UW44 Patient Slice Culture Analysis pt.2

#Loading the Data

library(Seurat)

## Attaching SeuratObject

library(sctransform)
library(MAST)

## Loading required package: SingleCellExperiment

## Loading required package: SummarizedExperiment

## Loading required package: MatrixGenerics

## Loading required package: matrixStats

## 
## Attaching package: 'MatrixGenerics'

## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars

## Loading required package: GenomicRanges

## Loading required package: stats4

## Loading required package: BiocGenerics

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min

## Loading required package: S4Vectors

## 
## Attaching package: 'S4Vectors'

## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname

## Loading required package: IRanges

## Loading required package: GenomeInfoDb

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## 
## Attaching package: 'Biobase'

## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians

## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians

## 
## Attaching package: 'SummarizedExperiment'

## The following object is masked from 'package:SeuratObject':
## 
##     Assays

## The following object is masked from 'package:Seurat':
## 
##     Assays

library(tidyverse)

## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──

## ✔ ggplot2 3.4.2     ✔ purrr   1.0.1
## ✔ tibble  3.2.1     ✔ dplyr   1.1.2
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse()   masks IRanges::collapse()
## ✖ dplyr::combine()    masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()      masks matrixStats::count()
## ✖ dplyr::desc()       masks IRanges::desc()
## ✖ tidyr::expand()     masks S4Vectors::expand()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::first()      masks S4Vectors::first()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()     masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()     masks S4Vectors::rename()
## ✖ dplyr::slice()      masks IRanges::slice()

library(rcna)
library(openxlsx)
library(HGNChelper)
library(harmony)

## Loading required package: Rcpp

library(glmGamPoi)

## 
## Attaching package: 'glmGamPoi'
## 
## The following object is masked from 'package:dplyr':
## 
##     vars
## 
## The following object is masked from 'package:ggplot2':
## 
##     vars

library(reshape2)

## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

library(dplyr)
library(ggrepel)
options(ggrepel.max.overlaps = Inf)
library(cowplot) 
theme_set(theme_cowplot())
theme(plot.title = element_text(hjust = 0.5,face = "plain"))

## List of 1
##  $ plot.title:List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0.5
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

library(forcats)

setwd('/hpc/group/patellab/aam131/UW44SliceCultureNew/UW44scx')

slice = readRDS("annotatedUW44Data.RDS")

#Plotting the Data By Various Variables

slice$Time <- factor(slice$Time, levels = c("D0", "D2", "D4","D5","D7","D10","D12","D14","D16"))
DimPlot(slice, split.by= "Condition", group.by="Time", label=FALSE, pt.size = 0.7)

DimPlot(slice, split.by= "Time", group.by="Condition", label=FALSE, pt.size = 0.7)

Of note, we can see that the only time points that exist wherein there is data available for all three conditions are D7 and D10.

#Evolution of cell proportions over time

new.cluster.ids <- c("Tumor","Tumor","Tumor", "Radial Glial Cells","Tumor", "Microglia","Oligodendrocytes","Microglia", "Neuroepithelial Cells","Astrocytes","Oligodendrocytes","Pericytes","Microglia","Endothelial Cells","Neurons","Astrocytes","Neurons")
names(new.cluster.ids) <- levels(slice)
slice <- RenameIdents(slice, new.cluster.ids)
slice$AnnotedClustedShallow <- Idents(slice)

Idents(slice)<-'Condition'
slicectrl<-subset(slice, idents=c('Ctrl'))
Idents(slicectrl)<-'AnnotedClustedShallow'

tablectrl44 <- as.data.frame(table(slicectrl@meta.data$AnnotedClustedShallow, slicectrl@meta.data$Time))
tablectrl44 <- dcast(tablectrl44, Var1 ~ Var2)

## Using Freq as value column: use value.var to override.

rownames(tablectrl44) <- tablectrl44[,1]
tablectrl44[,1] <- NULL
data_percentage <- apply(tablectrl44, 2, function(x){x*100/sum(x,na.rm=T)})
tablectrl44 <- data_percentage[,-c(3,7,9)]
tablectrl44<- melt(tablectrl44)
tablectrl44$Var1 <- as.factor(tablectrl44$Var1)
ggplot(tablectrl44, aes(x = Var2, y = value, fill = Var1)) +
  geom_bar(stat = "Identity", alpha = 0.9, color = "black")+
  xlab("Time") +
  ylab("Proportion of Slice (%)")

Idents(slicectrl)<-'clusterlabels'

tablectrl44 <- as.data.frame(table(slicectrl@meta.data$clusterlabels, slicectrl@meta.data$Time))
tablectrl44 <- dcast(tablectrl44, Var1 ~ Var2)

## Using Freq as value column: use value.var to override.

rownames(tablectrl44) <- tablectrl44[,1]
tablectrl44[,1] <- NULL
data_percentage <- apply(tablectrl44, 2, function(x){x*100/sum(x,na.rm=T)})
tablectrl44 <- data_percentage[,-c(3,7,9)]
tablectrl44<- melt(tablectrl44)
tablectrl44$Var1 <- as.factor(tablectrl44$Var1)
ggplot(tablectrl44, aes(x = Var2, y = value, fill = Var1)) +
  geom_bar(stat = "Identity", alpha = 0.9, color = "black")+
  xlab("Time") +
  ylab("Proportion of Slice (%)")

# Determining Cell Stress Signatures over Time https://bioconductor.org/packages/devel/bioc/vignettes/UCell/inst/doc/UCell_Seurat.html#1_Introduction

signatures <- list(
    ReactomeCellularResponseToChemicalStress = c("GCLC","CREBBP","PSMB1","BRCA1","PSMC4","GCLM","CUL3","PSMA4","CYBA","CUL1","ME1","PSME4","UFD1","CSNK2A2","KEAP1","SESN1","SMARCD3","GSK3B","PALB2","GSTP1","NCOA1","NOX4","PSMC5","BLVRB","MUL1","PSME1","PSMD5","PSMD8","HMOX1","TXN2","NCF4","RBX1","EP300","PSMC6","PSMA3","PSMC1","PSMB5","PSMA6","PSME2","PSMA7","PTK6","CSNK2A1","HM13","PSMD10","TBL1X","PSMD7","ABCC1","AQP8","HMOX2","GSR","AKT2","PSMA2","BLVRA","PSMD3","PSMD11","SOD3","PSMD9","COX6A1","SKP1","PSMD14","COX7A2L","NFE2L2","GPX7","NCF2","AKT3","PRDX1","PRDX6","CAT","CDKN1A","MED1","PSMF1","PSMB2","COX6B1","PRDX5","COX7C","SIN3B","SEM1","ATF4","PSMA1","HELZ2","SESN2","COX4I1","COX7B","PSME3","TRIM21","SCO1","COX16","COX5B","TACO1","TXN","PSMB7","MYC","TGS1","LRPPRC","IDH1","NCOA2","MAP1LC3B","NCOR1","SOD1","AKT1","CARM1","PSMB6","PGD","PSMA5","RPS27A","SKP2","FBXL17","CDKN2A","SURF1","NOTCH1","UBC","SLC7A11","PSMA8","BACH1","NCF1","PSMD4","PSMB4","G6PD","SQSTM1","PSMC2","NLRP3","FABP1","COX18","ALB","PSMD6","TKT","PRKCD","UBXN7","GPX8","COX6C","NUDT2","CYBB","ATP7A","VCP","PRDX3","PSMC3","BTRC","COX11","PRDX2","STAT3","SIN3A","UBB","HDAC3","CYCS","PSMD1","CCS","GSTA3","PSMD2","GPX2","COX8A","TALDO1","CHD9","ATOX1","TBL1XR1","STAP2","COX14","COX5A","NQO1","NPLOC4","TXNRD2","AMER1","P4HB","PSMD13","RXRA","PPARA","HBA2","NDUFA4","NCOR2","MAFG","PSMD12","ERO1A","TXNRD1","MAFK","NCOA6","GPX6","MT-CO2","MT-CO1","MT-CO3","COX20","PSMB8","CSNK2B","PSMB10","HBA1","PSMB8","CSNK2B","GPX3","UBA52","PSMB11","CSNK2B","GPX5","CSNK2B","PSMB8","CSNK2B","PSMB8","PSMB8","CSNK2B","PSMB8","CSNK2B","GPX1","PSMB8","PSMB8","PSMB9","PSMB9","PSMB9","COX19","PSMB9","PSMB9","PSMB9","PSMB9","GSTA1","PSMB9","HBB","DPP3","NOX5","TXNIP","SRXN1","AKT3","PSMB3","HMOX2","PSMB3","ABCC1","SURF1","PSMC4","SCO2","PSME2","PSME1","SESN2","NOX5","SOD2"),
    BiocartaStressResponse = c("TNF","TNFRSF1A","IKBKG","IKBKG","RELA","IKBKG","NFKB1","IKBKB","TANK","CASP2","IKBKB","RELA","RELA","CHUK","MAPK14","IKBKB","JUN","MAPK8","MAP2K3","MAP2K6","MAP2K4","IKBKG","TRADD","RIPK1","CRADD","MAP3K14","NFKB1","TANK","MAP4K2","ATF1","MAP3K1","NFKBIA","TRAF2","RELA","CASP2","CASP2","TANK","MAPK14","MAPK14","MAPK14","MAPK8","MAPK8","MAP2K3"),
    NatureISR = c("ASNS","PTX3","SLC7A5","LGALS3","SLC1A5","APBA3","MTHFD2","PMP22","MRPS7","NID2","EIF4EBP1","IGFBP2","NARS","OSMR","WARS","TNFRSF11B","BCAT1","RHOJ","SLC3A2","CCN4","CPO","GRB10","ERO1L","LONP1","HMOX1","SERPINF1","CEBPB","GYS1","PRRX2","CLIC4","CEBPG","HIVEP2"))


library(UCell)

## Warning: package 'UCell' was built under R version 4.2.3

library(Seurat)

seurat.object <- AddModuleScore_UCell(slicectrl, 
                                      features=signatures, name=NULL)

## Warning: The following genes were not found and will be
##                         imputed to exp=0:
## * FABP1,GPX6,PSMB11,GPX5,GPX1,GSTA1,ERO1L

FeaturePlot(seurat.object, reduction = "umap", features = names(signatures),split.by = "Time")

#Setting Up CNA Object

Idents(slice)<-'Time'
slice7and10<-subset(slice, idents=c('D7','D10'))


library(harmony)
library(Seurat)
library(sctransform)
library(MAST)
library(tidyverse)
library(rcna)
library(glmGamPoi)
library(reshape2)
library(dplyr)
library(ggrepel)
options(ggrepel.max.overlaps = Inf)
library(cowplot) 
theme_set(theme_cowplot())
theme(plot.title = element_text(hjust = 0.5,face = "plain"))

## List of 1
##  $ plot.title:List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0.5
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

library(forcats)
library(glue)

## 
## Attaching package: 'glue'

## The following object is masked from 'package:SummarizedExperiment':
## 
##     trim

## The following object is masked from 'package:GenomicRanges':
## 
##     trim

## The following object is masked from 'package:IRanges':
## 
##     trim

library(ggthemes)

## 
## Attaching package: 'ggthemes'

## The following object is masked from 'package:cowplot':
## 
##     theme_map

slice7and10@meta.data$ConditionNum <- as.numeric(factor(slice7and10@meta.data$Condition, c('Ctrl', 'Ivosidenib','XRT/TMZ')))
Idents(slice7and10)<-'Condition'

#CNA Analysis of Ivo vs Ctrl

ivo<-subset(slice7and10, idents=c('Ctrl','Ivosidenib'))
ivo<-FindNeighbors(ivo,reduction='harmony')

## Computing nearest neighbor graph

## Computing SNN

ivo<-association.Seurat(
  seurat_object = ivo, 
  test_var = 'ConditionNum', 
  samplem_key = 'Time', 
  graph_use = 'SCT_nn', 
  verbose = TRUE,
  batches = NULL, ## no batch variables to include
  covs = NULL, ## no covariates to include 
)

## Build NAM PCs

## stopping after 4 steps

## only one unique batch supplied to qc

## only one unique batch supplied to prep

## Perform association testing

## Warning in .association(NAMsvd = list(nam_res$NAM_sampleXpc, nam_res$NAM_svs, :
## data supported use of 2 NAM PCs, which is the maximum considered. Consider
## allowing more PCs by using the "ks" argument.

## computing neighborhood-level FDRs

FeaturePlot(ivo, features = c('cna_ncorrs'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', color = 'Correlation',subtitle = sprintf('global p=%0.3f', ivo@reductions$cna@misc$p))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

FeaturePlot(ivo, features = c('cna_ncorrs_fdr10'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', subtitle = 'Filtered for FDR<0.10', color = 'Correlation')

## Warning in FeaturePlot(ivo, features = c("cna_ncorrs_fdr10")): All cells have
## the same value (0) of cna_ncorrs_fdr10.

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

DimPlot(ivo, group.by = 'Condition')[[1]] + scale_color_tableau() + labs(title = 'Condition')

#CNA Analysis of XRT/TMZ vs Ctrl

xrt<-subset(slice7and10, idents=c('Ctrl','XRT/TMZ'))
xrt<-FindNeighbors(xrt,reduction='harmony')

## Computing nearest neighbor graph

## Computing SNN

xrt<-association.Seurat(
  seurat_object = xrt, 
  test_var = 'ConditionNum', 
  samplem_key = 'Time', 
  graph_use = 'SCT_nn', 
  verbose = TRUE,
  batches = NULL, ## no batch variables to include
  covs = NULL, ## no covariates to include 
)

## Build NAM PCs

## stopping after 4 steps

## only one unique batch supplied to qc

## only one unique batch supplied to prep

## Perform association testing

## Warning in .association(NAMsvd = list(nam_res$NAM_sampleXpc, nam_res$NAM_svs, :
## data supported use of 2 NAM PCs, which is the maximum considered. Consider
## allowing more PCs by using the "ks" argument.

## computing neighborhood-level FDRs

FeaturePlot(xrt, features = c('cna_ncorrs'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', color = 'Correlation',subtitle = sprintf('global p=%0.3f', xrt@reductions$cna@misc$p))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

FeaturePlot(xrt, features = c('cna_ncorrs_fdr10'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', subtitle = 'Filtered for FDR<0.10', color = 'Correlation')

## Warning in FeaturePlot(xrt, features = c("cna_ncorrs_fdr10")): All cells have
## the same value (0) of cna_ncorrs_fdr10.

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

DimPlot(xrt, group.by = 'Condition')[[1]] + scale_color_tableau() + labs(title = 'Condition')

#CNA Analysis of XRT/TMZ vs Ivo

ivovsxrt<-subset(slice7and10, idents=c('Ivosidenib','XRT/TMZ'))
ivovsxrt<-FindNeighbors(ivovsxrt,reduction='harmony')

## Computing nearest neighbor graph

## Computing SNN

ivovsxrt<-association.Seurat(
  seurat_object = ivovsxrt, 
  test_var = 'ConditionNum', 
  samplem_key = 'Time', 
  graph_use = 'SCT_nn', 
  verbose = TRUE,
  batches = NULL, ## no batch variables to include
  covs = NULL, ## no covariates to include 
)

## Build NAM PCs

## stopping after 4 steps

## only one unique batch supplied to qc

## only one unique batch supplied to prep

## Perform association testing

## Warning in .association(NAMsvd = list(nam_res$NAM_sampleXpc, nam_res$NAM_svs, :
## data supported use of 2 NAM PCs, which is the maximum considered. Consider
## allowing more PCs by using the "ks" argument.

## computing neighborhood-level FDRs

FeaturePlot(ivovsxrt, features = c('cna_ncorrs'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', color = 'Correlation',subtitle = sprintf('global p=%0.3f', ivovsxrt@reductions$cna@misc$p))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

FeaturePlot(ivovsxrt, features = c('cna_ncorrs_fdr10'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', subtitle = 'Filtered for FDR<0.10', color = 'Correlation')

## Warning in FeaturePlot(ivovsxrt, features = c("cna_ncorrs_fdr10")): All cells
## have the same value (0) of cna_ncorrs_fdr10.

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

DimPlot(ivovsxrt, group.by = 'Condition')[[1]] + scale_color_tableau() + labs(title = 'Condition')

#DEG between Ivo vs Ctrl

library(cowplot)
ivotumor <- SetIdent(ivo, value = "clusterlabels")
ivotumor <- subset(x = ivotumor, idents = c("Tumor c1","Tumor c2","Tumor c3","Tumor c4"))
ivotimes <- SetIdent(ivotumor, value = "Time")
var1 = c("D7")
var2 = c("D10")
ivoD7 <- subset(x = ivotimes, idents = var1)
ivoD10 <- subset(x = ivotimes, idents = var2)

ivoD7 <- SetIdent(ivoD7, value = "Condition")
ivoD7_1 <- FindMarkers(ivoD7, ident.1 = "Ivosidenib", ident.2 = "Ctrl", verbose = FALSE, min.pct = 0.1, test.use = "wilcox", logfc.threshold = 0.1)
ivoD7_1$gene <- rownames(ivoD7_1)
ivoD7_1$Expression <- rownames(ivoD7_1)
ivoD7_1 <- ivoD7_1 %>% 
  mutate(
    Expression = case_when(avg_log2FC >= 0.5 & p_val_adj <= 0.0001 ~ "Up-regulated",
                           avg_log2FC <= -0.5 & p_val_adj <= 0.0001 ~ "Down-regulated",
                           TRUE ~ "Unchanged"))

top <- 25
top_genes <- bind_rows(
  ivoD7_1 %>% 
    filter(Expression == 'Up-regulated') %>% 
    arrange(p_val_adj, desc(abs(avg_log2FC))) %>% 
    head(top),
  ivoD7_1 %>% 
    filter(Expression == 'Down-regulated') %>% 
    arrange(p_val_adj, desc(abs(avg_log2FC))) %>% 
    head(top)
)


library(ggrepel)
options(ggrepel.max.overlaps = Inf)
library(cowplot) 
theme_set(theme_cowplot())
theme(plot.title = element_text(hjust = 0.5,face = "plain"))

## List of 1
##  $ plot.title:List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0.5
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

library(forcats) 
p=
  ggplot(ivoD7_1, aes(x=avg_log2FC, y=-log10(p_val_adj))) +
  geom_point(aes(color = Expression), size = 1, alpha=0.8)+
  xlab("Log 2 Fold Change") +
  ylab("-Log10 Adjusted P-Value")+
  scale_color_manual(values = c("dodgerblue3", "gray50", "sienna2")) +
  guides(colour = guide_legend(override.aes = list(size=1.5)))+
  geom_hline(yintercept=4, linetype="dashed",
             color = "black", size=0.5)+
  geom_vline(xintercept = -0.5, linetype="dotdash",
             color = "black", size=0.5)+
  geom_vline(xintercept = 0.5, linetype="dotdash",
             color = "black", size=0.5)+
  theme(plot.title = element_text(hjust = 0.5,face = "plain"))+
  theme(legend.position = "none")+
  geom_label_repel(data = top_genes,
                   mapping = aes(avg_log2FC, -log10(p_val_adj), label = gene),
                   size = 4.5)

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

###

ivoD10 <- SetIdent(ivoD10, value = "Condition")
ivoD10_1 <- FindMarkers(ivoD10, ident.1 = "Ivosidenib", ident.2 = "Ctrl", verbose = FALSE, min.pct = 0.1, test.use = "wilcox", logfc.threshold = 0.1)
ivoD10_1$gene <- rownames(ivoD10_1)
ivoD10_1$Expression <- rownames(ivoD10_1)
ivoD10_1 <- ivoD10_1 %>% 
  mutate(
    Expression = case_when(avg_log2FC >= 0.5 & p_val_adj <= 0.0001 ~ "Up-regulated",
                           avg_log2FC <= -0.5 & p_val_adj <= 0.0001 ~ "Down-regulated",
                           TRUE ~ "Unchanged"))

top <- 25
top_genes <- bind_rows(
  ivoD10_1 %>% 
    filter(Expression == 'Up-regulated') %>% 
    arrange(p_val_adj, desc(abs(avg_log2FC))) %>% 
    head(top),
  ivoD10_1 %>% 
    filter(Expression == 'Down-regulated') %>% 
    arrange(p_val_adj, desc(abs(avg_log2FC))) %>% 
    head(top)
)


library(ggrepel)
options(ggrepel.max.overlaps = Inf)
library(cowplot) 
theme_set(theme_cowplot())
theme(plot.title = element_text(hjust = 0.5,face = "plain"))

## List of 1
##  $ plot.title:List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0.5
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

library(forcats) 
p=
  ggplot(ivoD10_1, aes(x=avg_log2FC, y=-log10(p_val_adj))) +
  geom_point(aes(color = Expression), size = 1, alpha=0.8)+
  xlab("Log 2 Fold Change") +
  ylab("-Log10 Adjusted P-Value")+
  scale_color_manual(values = c("dodgerblue3", "gray50", "sienna2")) +
  guides(colour = guide_legend(override.aes = list(size=1.5)))+
  geom_hline(yintercept=4, linetype="dashed",
             color = "black", size=0.5)+
  geom_vline(xintercept = -0.5, linetype="dotdash",
             color = "black", size=0.5)+
  geom_vline(xintercept = 0.5, linetype="dotdash",
             color = "black", size=0.5)+
  theme(plot.title = element_text(hjust = 0.5,face = "plain"))+
  theme(legend.position = "none")+
  geom_label_repel(data = top_genes,
                   mapping = aes(avg_log2FC, -log10(p_val_adj), label = gene),
                   size = 4.5)

p

#DEG between XRT vs Ctrl

library(cowplot)
xrttumor <- SetIdent(xrt, value = "clusterlabels")
xrttumor <- subset(x = xrttumor, idents = c("Tumor c1","Tumor c2","Tumor c3","Tumor c4"))
xrttimes <- SetIdent(xrttumor, value = "Time")
var1 = c("D7")
var2 = c("D10")
xrtD7 <- subset(x = xrttimes, idents = var1)
xrtD10 <- subset(x = xrttimes, idents = var2)

xrtD7 <- SetIdent(xrtD7, value = "Condition")
xrtD7_1 <- FindMarkers(xrtD7, ident.1 = "XRT/TMZ", ident.2 = "Ctrl", verbose = FALSE, min.pct = 0.1, test.use = "wilcox", logfc.threshold = 0.1)
xrtD7_1$gene <- rownames(xrtD7_1)
xrtD7_1$Expression <- rownames(xrtD7_1)
xrtD7_1 <- xrtD7_1 %>% 
  mutate(
    Expression = case_when(avg_log2FC >= 0.5 & p_val_adj <= 0.0001 ~ "Up-regulated",
                           avg_log2FC <= -0.5 & p_val_adj <= 0.0001 ~ "Down-regulated",
                           TRUE ~ "Unchanged"))

top <- 25
top_genes <- bind_rows(
  xrtD7_1 %>% 
    filter(Expression == 'Up-regulated') %>% 
    arrange(p_val_adj, desc(abs(avg_log2FC))) %>% 
    head(top),
  xrtD7_1 %>% 
    filter(Expression == 'Down-regulated') %>% 
    arrange(p_val_adj, desc(abs(avg_log2FC))) %>% 
    head(top)
)


library(ggrepel)
options(ggrepel.max.overlaps = Inf)
library(cowplot) 
theme_set(theme_cowplot())
theme(plot.title = element_text(hjust = 0.5,face = "plain"))

## List of 1
##  $ plot.title:List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0.5
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

library(forcats) 
p=
  ggplot(xrtD7_1, aes(x=avg_log2FC, y=-log10(p_val_adj))) +
  geom_point(aes(color = Expression), size = 1, alpha=0.8)+
  xlab("Log 2 Fold Change") +
  ylab("-Log10 Adjusted P-Value")+
  scale_color_manual(values = c("dodgerblue3", "gray50", "sienna2")) +
  guides(colour = guide_legend(override.aes = list(size=1.5)))+
  geom_hline(yintercept=4, linetype="dashed",
             color = "black", size=0.5)+
  geom_vline(xintercept = -0.5, linetype="dotdash",
             color = "black", size=0.5)+
  geom_vline(xintercept = 0.5, linetype="dotdash",
             color = "black", size=0.5)+
  theme(plot.title = element_text(hjust = 0.5,face = "plain"))+
  theme(legend.position = "none")+
  geom_label_repel(data = top_genes,
                   mapping = aes(avg_log2FC, -log10(p_val_adj), label = gene),
                   size = 4.5)

p

###

xrtD10 <- SetIdent(xrtD10, value = "Condition")
xrtD10_1 <- FindMarkers(xrtD10, ident.1 = "XRT/TMZ", ident.2 = "Ctrl", verbose = FALSE, min.pct = 0.1, test.use = "wilcox", logfc.threshold = 0.1)
xrtD10_1$gene <- rownames(xrtD10_1)
xrtD10_1$Expression <- rownames(xrtD10_1)
xrtD10_1 <- xrtD10_1 %>% 
  mutate(
    Expression = case_when(avg_log2FC >= 0.5 & p_val_adj <= 0.0001 ~ "Up-regulated",
                           avg_log2FC <= -0.5 & p_val_adj <= 0.0001 ~ "Down-regulated",
                           TRUE ~ "Unchanged"))

top <- 25
top_genes <- bind_rows(
  xrtD10_1 %>% 
    filter(Expression == 'Up-regulated') %>% 
    arrange(p_val_adj, desc(abs(avg_log2FC))) %>% 
    head(top),
  xrtD10_1 %>% 
    filter(Expression == 'Down-regulated') %>% 
    arrange(p_val_adj, desc(abs(avg_log2FC))) %>% 
    head(top)
)


library(ggrepel)
options(ggrepel.max.overlaps = Inf)
library(cowplot) 
theme_set(theme_cowplot())
theme(plot.title = element_text(hjust = 0.5,face = "plain"))

## List of 1
##  $ plot.title:List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0.5
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

library(forcats) 
p=
  ggplot(xrtD10_1, aes(x=avg_log2FC, y=-log10(p_val_adj))) +
  geom_point(aes(color = Expression), size = 1, alpha=0.8)+
  xlab("Log 2 Fold Change") +
  ylab("-Log10 Adjusted P-Value")+
  scale_color_manual(values = c("dodgerblue3", "gray50", "sienna2")) +
  guides(colour = guide_legend(override.aes = list(size=1.5)))+
  geom_hline(yintercept=4, linetype="dashed",
             color = "black", size=0.5)+
  geom_vline(xintercept = -0.5, linetype="dotdash",
             color = "black", size=0.5)+
  geom_vline(xintercept = 0.5, linetype="dotdash",
             color = "black", size=0.5)+
  theme(plot.title = element_text(hjust = 0.5,face = "plain"))+
  theme(legend.position = "none")+
  geom_label_repel(data = top_genes,
                   mapping = aes(avg_log2FC, -log10(p_val_adj), label = gene),
                   size = 4.5)

p

#DEG between XRT vs Ivo

library(cowplot)
ivovsxrttumor <- SetIdent(ivovsxrt, value = "clusterlabels")
ivovsxrttumor <- subset(x = ivovsxrttumor, idents = c("Tumor c1","Tumor c2","Tumor c3","Tumor c4"))
ivovsxrttimes <- SetIdent(ivovsxrttumor, value = "Time")
var1 = c("D7")
var2 = c("D10")
ivovsxrtD7 <- subset(x = ivovsxrttimes, idents = var1)
ivovsxrtD10 <- subset(x = ivovsxrttimes, idents = var2)

ivovsxrtD7 <- SetIdent(ivovsxrtD7, value = "Condition")
ivovsxrtD7_1 <- FindMarkers(ivovsxrtD7, ident.1 = "XRT/TMZ", ident.2 = "Ivosidenib", verbose = FALSE, min.pct = 0.1, test.use = "wilcox", logfc.threshold = 0.1)
ivovsxrtD7_1$gene <- rownames(ivovsxrtD7_1)
ivovsxrtD7_1$Expression <- rownames(ivovsxrtD7_1)
ivovsxrtD7_1 <- ivovsxrtD7_1 %>% 
  mutate(
    Expression = case_when(avg_log2FC >= 0.5 & p_val_adj <= 0.0001 ~ "Up-regulated",
                           avg_log2FC <= -0.5 & p_val_adj <= 0.0001 ~ "Down-regulated",
                           TRUE ~ "Unchanged"))

top <- 25
top_genes <- bind_rows(
  ivovsxrtD7_1 %>% 
    filter(Expression == 'Up-regulated') %>% 
    arrange(p_val_adj, desc(abs(avg_log2FC))) %>% 
    head(top),
  ivovsxrtD7_1 %>% 
    filter(Expression == 'Down-regulated') %>% 
    arrange(p_val_adj, desc(abs(avg_log2FC))) %>% 
    head(top)
)


library(ggrepel)
options(ggrepel.max.overlaps = Inf)
library(cowplot) 
theme_set(theme_cowplot())
theme(plot.title = element_text(hjust = 0.5,face = "plain"))

## List of 1
##  $ plot.title:List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0.5
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

library(forcats) 
p=
  ggplot(ivovsxrtD7_1, aes(x=avg_log2FC, y=-log10(p_val_adj))) +
  geom_point(aes(color = Expression), size = 1, alpha=0.8)+
  xlab("Log 2 Fold Change") +
  ylab("-Log10 Adjusted P-Value")+
  scale_color_manual(values = c("dodgerblue3", "gray50", "sienna2")) +
  guides(colour = guide_legend(override.aes = list(size=1.5)))+
  geom_hline(yintercept=4, linetype="dashed",
             color = "black", size=0.5)+
  geom_vline(xintercept = -0.5, linetype="dotdash",
             color = "black", size=0.5)+
  geom_vline(xintercept = 0.5, linetype="dotdash",
             color = "black", size=0.5)+
  theme(plot.title = element_text(hjust = 0.5,face = "plain"))+
  theme(legend.position = "none")+
  geom_label_repel(data = top_genes,
                   mapping = aes(avg_log2FC, -log10(p_val_adj), label = gene),
                   size = 4.5)

p

###

ivovsxrtD10 <- SetIdent(ivovsxrtD10, value = "Condition")
ivovsxrtD10_1 <- FindMarkers(ivovsxrtD10, ident.1 = "XRT/TMZ", ident.2 = "Ivosidenib", verbose = FALSE, min.pct = 0.1, test.use = "wilcox", logfc.threshold = 0.1)
ivovsxrtD10_1$gene <- rownames(ivovsxrtD10_1)
ivovsxrtD10_1$Expression <- rownames(ivovsxrtD10_1)
ivovsxrtD10_1 <- ivovsxrtD10_1 %>% 
  mutate(
    Expression = case_when(avg_log2FC >= 0.5 & p_val_adj <= 0.0001 ~ "Up-regulated",
                           avg_log2FC <= -0.5 & p_val_adj <= 0.0001 ~ "Down-regulated",
                           TRUE ~ "Unchanged"))

top <- 25
top_genes <- bind_rows(
  ivovsxrtD10_1 %>% 
    filter(Expression == 'Up-regulated') %>% 
    arrange(p_val_adj, desc(abs(avg_log2FC))) %>% 
    head(top),
  ivovsxrtD10_1 %>% 
    filter(Expression == 'Down-regulated') %>% 
    arrange(p_val_adj, desc(abs(avg_log2FC))) %>% 
    head(top)
)


library(ggrepel)
options(ggrepel.max.overlaps = Inf)
library(cowplot) 
theme_set(theme_cowplot())
theme(plot.title = element_text(hjust = 0.5,face = "plain"))

## List of 1
##  $ plot.title:List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0.5
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

library(forcats) 
p=
  ggplot(ivovsxrtD10_1, aes(x=avg_log2FC, y=-log10(p_val_adj))) +
  geom_point(aes(color = Expression), size = 1, alpha=0.8)+
  xlab("Log 2 Fold Change") +
  ylab("-Log10 Adjusted P-Value")+
  scale_color_manual(values = c("dodgerblue3", "gray50", "sienna2")) +
  guides(colour = guide_legend(override.aes = list(size=1.5)))+
  geom_hline(yintercept=4, linetype="dashed",
             color = "black", size=0.5)+
  geom_vline(xintercept = -0.5, linetype="dotdash",
             color = "black", size=0.5)+
  geom_vline(xintercept = 0.5, linetype="dotdash",
             color = "black", size=0.5)+
  theme(plot.title = element_text(hjust = 0.5,face = "plain"))+
  theme(legend.position = "none")+
  geom_label_repel(data = top_genes,
                   mapping = aes(avg_log2FC, -log10(p_val_adj), label = gene),
                   size = 4.5)

p

#Running the ReactomePA Analysis on All Comparisons

library(org.Hs.eg.db)

## Loading required package: AnnotationDbi

## 
## Attaching package: 'AnnotationDbi'

## The following object is masked from 'package:dplyr':
## 
##     select

##

library(ReactomePA)

##

## ReactomePA v1.42.0  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use ReactomePA in published research, please cite:
## Guangchuang Yu, Qing-Yu He. ReactomePA: an R/Bioconductor package for reactome pathway analysis and visualization. Molecular BioSystems 2016, 12(2):477-479

library(clusterProfiler)

## clusterProfiler v4.6.2  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141

## 
## Attaching package: 'clusterProfiler'

## The following object is masked from 'package:AnnotationDbi':
## 
##     select

## The following object is masked from 'package:purrr':
## 
##     simplify

## The following object is masked from 'package:IRanges':
## 
##     slice

## The following object is masked from 'package:S4Vectors':
## 
##     rename

## The following object is masked from 'package:stats':
## 
##     filter

#Ivo vs Ctrl 
## D7
ReactomaPAData = ivoD7_1 %>% filter(ivoD7_1$avg_log2FC > 0)
ReactomaPAData = data.frame(ReactomaPAData$gene)
ReactomaPAData$Gene <- mapIds(org.Hs.eg.db, keys = gsub("\\..*","",ReactomaPAData[,1]),
                              column = "ENTREZID", keytype = "SYMBOL")

## 'select()' returned 1:1 mapping between keys and columns

ReactomaPAData <- na.omit(ReactomaPAData)
ReactomaPAData <- ReactomaPAData %>% filter(ReactomaPAData$Gene != "NANA")
ReactomaPAData <-unname(unlist(ReactomaPAData[2]))
x <- enrichPathway(gene=ReactomaPAData,pvalueCutoff=0.05, readable=T)
x1 <- x$Description
x1

## [1] "Antigen Presentation: Folding, assembly and peptide loading of class I MHC"

## D10
ReactomaPAData = ivoD10_1 %>% filter(ivoD10_1$avg_log2FC > 0)
ReactomaPAData = data.frame(ReactomaPAData$gene)
ReactomaPAData$Gene <- mapIds(org.Hs.eg.db, keys = gsub("\\..*","",ReactomaPAData[,1]),
                              column = "ENTREZID", keytype = "SYMBOL")

## 'select()' returned 1:1 mapping between keys and columns

ReactomaPAData <- na.omit(ReactomaPAData)
ReactomaPAData <- ReactomaPAData %>% filter(ReactomaPAData$Gene != "NANA")
ReactomaPAData <-unname(unlist(ReactomaPAData[2]))
x <- enrichPathway(gene=ReactomaPAData,pvalueCutoff=0.05, readable=T)
x1 <- x$Description
x1

## [1] "Extracellular matrix organization"                                                                                          
## [2] "Degradation of the extracellular matrix"                                                                                    
## [3] "Scavenging by Class A Receptors"                                                                                            
## [4] "Post-translational protein phosphorylation"                                                                                 
## [5] "Circadian Clock"                                                                                                            
## [6] "Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs)"
## [7] "Non-integrin membrane-ECM interactions"

##To look at any of these pathways in dot plot format we would use the following format:
#x1 <- x1[c(1,2,3,4,5,6,7,8,9,10)]
#dotplot(x, showCategory=x1)

Of interest we can see that there is an increase in MHC processing and presentation in the tumor cells treated with Ivo at Day 7.

#Xrt vs Ctrl

library(org.Hs.eg.db)
library(ReactomePA)
library(clusterProfiler)

#Ivo vs Ctrl 
## D7
ReactomaPAData = xrtD7_1 %>% filter(xrtD7_1$avg_log2FC > 0)
ReactomaPAData = data.frame(ReactomaPAData$gene)
ReactomaPAData$Gene <- mapIds(org.Hs.eg.db, keys = gsub("\\..*","",ReactomaPAData[,1]),
                              column = "ENTREZID", keytype = "SYMBOL")

## 'select()' returned 1:1 mapping between keys and columns

ReactomaPAData <- na.omit(ReactomaPAData)
ReactomaPAData <- ReactomaPAData %>% filter(ReactomaPAData$Gene != "NANA")
ReactomaPAData <-unname(unlist(ReactomaPAData[2]))
x <- enrichPathway(gene=ReactomaPAData,pvalueCutoff=0.05, readable=T)
x1 <- x$Description
x1

##  [1] "Resolution of D-loop Structures through Holliday Junction Intermediates"                                                    
##  [2] "Resolution of D-Loop Structures"                                                                                            
##  [3] "Impaired BRCA2 binding to PALB2"                                                                                            
##  [4] "Defective HDR through Homologous Recombination (HRR) due to BRCA1 loss-of-function"                                         
##  [5] "Defective HDR through Homologous Recombination (HRR) due to PALB2 loss of function"                                         
##  [6] "Defective HDR through Homologous Recombination Repair (HRR) due to PALB2 loss of BRCA1 binding function"                    
##  [7] "Defective HDR through Homologous Recombination Repair (HRR) due to PALB2 loss of BRCA2/RAD51/RAD51C binding function"       
##  [8] "HDR through Homologous Recombination (HRR)"                                                                                 
##  [9] "Diseases of DNA Double-Strand Break Repair"                                                                                 
## [10] "Defective homologous recombination repair (HRR) due to BRCA2 loss of function"                                              
## [11] "Diseases of DNA repair"                                                                                                     
## [12] "Homologous DNA Pairing and Strand Exchange"                                                                                 
## [13] "Resolution of D-loop Structures through Synthesis-Dependent Strand Annealing (SDSA)"                                        
## [14] "Cell Cycle Checkpoints"                                                                                                     
## [15] "Regulation of commissural axon pathfinding by SLIT and ROBO"                                                                
## [16] "Presynaptic phase of homologous DNA pairing and strand exchange"                                                            
## [17] "RHO GTPases Activate Formins"                                                                                               
## [18] "Activation of the pre-replicative complex"                                                                                  
## [19] "RHOF GTPase cycle"                                                                                                          
## [20] "Amplification of signal from the kinetochores"                                                                              
## [21] "Amplification  of signal from unattached  kinetochores via a MAD2  inhibitory signal"                                       
## [22] "DNA Repair"                                                                                                                 
## [23] "Post-translational protein phosphorylation"                                                                                 
## [24] "Mitotic G1 phase and G1/S transition"                                                                                       
## [25] "Homology Directed Repair"                                                                                                   
## [26] "CRMPs in Sema3A signaling"                                                                                                  
## [27] "Mitotic Spindle Checkpoint"                                                                                                 
## [28] "DNA Double-Strand Break Repair"                                                                                             
## [29] "RHOD GTPase cycle"                                                                                                          
## [30] "ATF6 (ATF6-alpha) activates chaperone genes"                                                                                
## [31] "Acetylcholine regulates insulin secretion"                                                                                  
## [32] "Impaired BRCA2 binding to RAD51"                                                                                            
## [33] "EML4 and NUDC in mitotic spindle formation"                                                                                 
## [34] "HDR through Homologous Recombination (HRR) or Single Strand Annealing (SSA)"                                                
## [35] "Activation of ATR in response to replication stress"                                                                        
## [36] "Kinesins"                                                                                                                   
## [37] "ATF6 (ATF6-alpha) activates chaperones"                                                                                     
## [38] "Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs)"
## [39] "Antigen Presentation: Folding, assembly and peptide loading of class I MHC"

## D10
ReactomaPAData = xrtD10_1 %>% filter(xrtD10_1$avg_log2FC > 0)
ReactomaPAData = data.frame(ReactomaPAData$gene)
ReactomaPAData$Gene <- mapIds(org.Hs.eg.db, keys = gsub("\\..*","",ReactomaPAData[,1]),
                              column = "ENTREZID", keytype = "SYMBOL")

## 'select()' returned 1:1 mapping between keys and columns

ReactomaPAData <- na.omit(ReactomaPAData)
ReactomaPAData <- ReactomaPAData %>% filter(ReactomaPAData$Gene != "NANA")
ReactomaPAData <-unname(unlist(ReactomaPAData[2]))
x <- enrichPathway(gene=ReactomaPAData,pvalueCutoff=0.05, readable=T)
x1 <- x$Description
x1

## [1] "Transcriptional regulation by RUNX2"              
## [2] "Defective B3GALTL causes PpS"                     
## [3] "O-glycosylation of TSR domain-containing proteins"
## [4] "RUNX2 regulates osteoblast differentiation"       
## [5] "O-linked glycosylation"                           
## [6] "RUNX2 regulates bone development"

##To look at any of these pathways in dot plot format we would use the following format:
#x1 <- x1[c(1,2,3,4,5,6,7,8,9,10)]
#dotplot(x, showCategory=x1)

Unsurprisingly tumor cells treated with SOC seem to show an upregualtion in DNA repair pathways at D7. At D10 it seems that the pathways that are enriched are primarily related to PTMs.

#Ivo vs Xrt

library(org.Hs.eg.db)
library(ReactomePA)
library(clusterProfiler)

#Ivo vs Xrt 
## D7
ReactomaPAData = ivovsxrtD7_1 %>% filter(ivovsxrtD7_1$avg_log2FC < 0)
ReactomaPAData = data.frame(ReactomaPAData$gene)
ReactomaPAData$Gene <- mapIds(org.Hs.eg.db, keys = gsub("\\..*","",ReactomaPAData[,1]),
                              column = "ENTREZID", keytype = "SYMBOL")

## 'select()' returned 1:1 mapping between keys and columns

ReactomaPAData <- na.omit(ReactomaPAData)
ReactomaPAData <- ReactomaPAData %>% filter(ReactomaPAData$Gene != "NANA")
ReactomaPAData <-unname(unlist(ReactomaPAData[2]))
x <- enrichPathway(gene=ReactomaPAData,pvalueCutoff=0.05, readable=T)
x1 <- x$Description
x1

## [1] "Neurexins and neuroligins"               
## [2] "RHO GTPase cycle"                        
## [3] "Protein-protein interactions at synapses"
## [4] "RHOA GTPase cycle"

## D10
ReactomaPAData = ivovsxrtD10_1 %>% filter(ivovsxrtD10_1$avg_log2FC < 0)
ReactomaPAData = data.frame(ReactomaPAData$gene)
ReactomaPAData$Gene <- mapIds(org.Hs.eg.db, keys = gsub("\\..*","",ReactomaPAData[,1]),
                              column = "ENTREZID", keytype = "SYMBOL")

## 'select()' returned 1:1 mapping between keys and columns

ReactomaPAData <- na.omit(ReactomaPAData)
ReactomaPAData <- ReactomaPAData %>% filter(ReactomaPAData$Gene != "NANA")
ReactomaPAData <-unname(unlist(ReactomaPAData[2]))
x <- enrichPathway(gene=ReactomaPAData,pvalueCutoff=0.05, readable=T)
x1 <- x$Description
x1

##  [1] "RHO GTPase cycle"                               
##  [2] "CDC42 GTPase cycle"                             
##  [3] "Protein-protein interactions at synapses"       
##  [4] "RAC1 GTPase cycle"                              
##  [5] "Neuronal System"                                
##  [6] "IL-6-type cytokine receptor ligand interactions"
##  [7] "Transmission across Chemical Synapses"          
##  [8] "Neurexins and neuroligins"                      
##  [9] "RHOA GTPase cycle"                              
## [10] "Extracellular matrix organization"              
## [11] "Interleukin-6 family signaling"                 
## [12] "RHOB GTPase cycle"                              
## [13] "RAB GEFs exchange GTP for GDP on RABs"          
## [14] "Non-integrin membrane-ECM interactions"         
## [15] "Integration of energy metabolism"               
## [16] "Ca2+ pathway"                                   
## [17] "Signaling by NOTCH1"                            
## [18] "Signal attenuation"

# Xrt vs Ivo  
## D7
ReactomaPAData = ivovsxrtD7_1 %>% filter(ivovsxrtD7_1$avg_log2FC > 0)
ReactomaPAData = data.frame(ReactomaPAData$gene)
ReactomaPAData$Gene <- mapIds(org.Hs.eg.db, keys = gsub("\\..*","",ReactomaPAData[,1]),
                              column = "ENTREZID", keytype = "SYMBOL")

## 'select()' returned 1:1 mapping between keys and columns

ReactomaPAData <- na.omit(ReactomaPAData)
ReactomaPAData <- ReactomaPAData %>% filter(ReactomaPAData$Gene != "NANA")
ReactomaPAData <-unname(unlist(ReactomaPAData[2]))
x <- enrichPathway(gene=ReactomaPAData,pvalueCutoff=0.05, readable=T)
x1 <- x$Description
x1

##  [1] "Neuronal System"                                            
##  [2] "Protein-protein interactions at synapses"                   
##  [3] "Regulation of commissural axon pathfinding by SLIT and ROBO"
##  [4] "L1CAM interactions"                                         
##  [5] "ECM proteoglycans"                                          
##  [6] "Netrin-1 signaling"                                         
##  [7] "NCAM signaling for neurite out-growth"                      
##  [8] "DSCAM interactions"                                         
##  [9] "Myogenesis"                                                 
## [10] "NCAM1 interactions"                                         
## [11] "Neurexins and neuroligins"                                  
## [12] "Presynaptic depolarization and calcium channel opening"     
## [13] "Voltage gated Potassium channels"                           
## [14] "Signal transduction by L1"

## D10
ReactomaPAData = ivovsxrtD10_1 %>% filter(ivovsxrtD10_1$avg_log2FC > 0)
ReactomaPAData = data.frame(ReactomaPAData$gene)
ReactomaPAData$Gene <- mapIds(org.Hs.eg.db, keys = gsub("\\..*","",ReactomaPAData[,1]),
                              column = "ENTREZID", keytype = "SYMBOL")

## 'select()' returned 1:1 mapping between keys and columns

ReactomaPAData <- na.omit(ReactomaPAData)
ReactomaPAData <- ReactomaPAData %>% filter(ReactomaPAData$Gene != "NANA")
ReactomaPAData <-unname(unlist(ReactomaPAData[2]))
x <- enrichPathway(gene=ReactomaPAData,pvalueCutoff=0.05, readable=T)
x1 <- x$Description
x1

## [1] "RHO GTPase cycle"                 "Signaling by TGFB family members"
## [3] "Signaling by BMP"                 "Netrin-1 signaling"

##To look at any of these pathways in dot plot format we would use the following format:
#x1 <- x1[c(1,2,3,4,5,6,7,8,9,10)]
#dotplot(x, showCategory=x1)

#CHEA Transcription Factor Analysis https://alexslemonade.github.io/refinebio-examples/03-rnaseq/pathway-analysis_rnaseq_02_gsea.html https://maayanlab.cloud/Harmonizome/dataset/CHEA+Transcription+Factor+Targets

library(DOSE)

## DOSE v3.24.2  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609

library("enrichplot")
library("clusterProfiler")
library("fgsea")
library("tibble")
library("msigdbr")
library(reshape2)
library(ComplexHeatmap)

## Loading required package: grid

## ========================================
## ComplexHeatmap version 2.14.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================

library(edgeR)

## Loading required package: limma

## 
## Attaching package: 'limma'

## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA

## 
## Attaching package: 'edgeR'

## The following object is masked from 'package:SingleCellExperiment':
## 
##     cpm

TranscriptionFactors <-  read.gmt("gene_set_library_crisp.gmt")
#D7
ReactomaPAData = ivoD7_1 %>% filter(ivoD7_1$avg_log2FC > 0)
dup_gene_symbols <- ReactomaPAData %>%
  dplyr::filter(duplicated(gene)) %>%
  dplyr::pull(gene)
ReactomaPAData %>%
  dplyr::filter(gene %in% dup_gene_symbols) %>%
  dplyr::arrange(gene)

filtered_dge_mapped_df <- ReactomaPAData %>%
  dplyr::arrange(dplyr::desc(abs(avg_log2FC))) %>%
  dplyr::distinct(gene, .keep_all = TRUE)
lfc_vector <- filtered_dge_mapped_df$avg_log2FC
names(lfc_vector) <- filtered_dge_mapped_df$gene
lfc_vector <- sort(lfc_vector, decreasing = TRUE)
head(lfc_vector)

## AC007319.1 AC125613.1 AC007098.1 AC104827.1 AL354771.1 AL353784.1 
##   1.894221   1.717035   1.333127   1.179856   1.156296   1.127319

set.seed(2020)
gsea_results <- GSEA(lfc_vector, seed = TRUE, pAdjustMethod = "BH", pvalueCutoff = 0.05, TERM2GENE = TranscriptionFactors)

## preparing geneSet collections...

## GSEA analysis...

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (3.94% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".

## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 6 pathways for which P-values were not calculated properly
## due to unbalanced (positive and negative) gene-level statistic values. For such
## pathways pval, padj, NES, log2err are set to NA. You can try to increase the
## value of the argument nPermSimple (for example set it nPermSimple = 10000)

## no term enriched under specific pvalueCutoff...

d <- data.frame(ID=1:nrow(gsea_results@result), Description =1:nrow(gsea_results@result), NES=1:nrow(gsea_results@result), pvalue=1:nrow(gsea_results@result))
d[,2]<- gsea_results$Description
d[,3]<- gsea_results$NES
d[,4] <- gsea_results$pvalue
d[,5] <- gsea_results$p.adjust
d <- d[order(d$pvalue), ] 
d

ReactomaPAData = ivoD7_1 %>% filter(ivoD7_1$avg_log2FC < 0)
dup_gene_symbols <- ReactomaPAData %>%
  dplyr::filter(duplicated(gene)) %>%
  dplyr::pull(gene)
ReactomaPAData %>%
  dplyr::filter(gene %in% dup_gene_symbols) %>%
  dplyr::arrange(gene)

filtered_dge_mapped_df <- ReactomaPAData %>%
  dplyr::arrange(dplyr::desc(abs(avg_log2FC))) %>%
  dplyr::distinct(gene, .keep_all = TRUE)
lfc_vector <- filtered_dge_mapped_df$avg_log2FC
names(lfc_vector) <- filtered_dge_mapped_df$gene
lfc_vector <- sort(lfc_vector, decreasing = TRUE)
head(lfc_vector)

##     THSD7A       IST1      DDX3X     DDX19A      MLLT6       GOT2 
## -0.1000576 -0.1000801 -0.1001088 -0.1001417 -0.1002170 -0.1002598

set.seed(2020)
gsea_results<-GSEA(lfc_vector, pvalueCutoff = 0.05, seed = TRUE, pAdjustMethod = "BH",TERM2GENE = TranscriptionFactors)

## preparing geneSet collections...
## GSEA analysis...

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (4.69% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 4 pathways for which P-values were not calculated properly
## due to unbalanced (positive and negative) gene-level statistic values. For such
## pathways pval, padj, NES, log2err are set to NA. You can try to increase the
## value of the argument nPermSimple (for example set it nPermSimple = 10000)

## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.

## leading edge analysis...

## done...

e <- data.frame(ID=1:nrow(gsea_results@result), Description =1:nrow(gsea_results@result), NES=1:nrow(gsea_results@result), pvalue=1:nrow(gsea_results@result))
e[,2]<- gsea_results$Description
e[,3]<- gsea_results$NES
e[,4] <- gsea_results$pvalue
e[,5] <- gsea_results$p.adjust
e <- e[order(e$pvalue), ] 
e

#D10
ReactomaPAData = ivoD10_1 %>% filter(ivoD10_1$avg_log2FC > 0)
dup_gene_symbols <- ReactomaPAData %>%
  dplyr::filter(duplicated(gene)) %>%
  dplyr::pull(gene)
ReactomaPAData %>%
  dplyr::filter(gene %in% dup_gene_symbols) %>%
  dplyr::arrange(gene)

filtered_dge_mapped_df <- ReactomaPAData %>%
  dplyr::arrange(dplyr::desc(abs(avg_log2FC))) %>%
  dplyr::distinct(gene, .keep_all = TRUE)
lfc_vector <- filtered_dge_mapped_df$avg_log2FC
names(lfc_vector) <- filtered_dge_mapped_df$gene
lfc_vector <- sort(lfc_vector, decreasing = TRUE)
head(lfc_vector)

##     CHI3L1 AC007319.1      NAMPT AC125613.1      PCDH9      NEAT1 
##  2.1285814  1.3344750  1.1260077  0.9343168  0.9301437  0.8966260

set.seed(2020)
gsea_results<-GSEA(lfc_vector, pvalueCutoff = 0.05, seed = TRUE, pAdjustMethod = "BH",TERM2GENE = TranscriptionFactors)

## preparing geneSet collections...

## GSEA analysis...

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (1.3% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : All values in the stats vector are greater than zero and scoreType
## is "std", maybe you should switch to scoreType = "pos".

## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 1 pathways for which P-values were not calculated properly
## due to unbalanced (positive and negative) gene-level statistic values. For such
## pathways pval, padj, NES, log2err are set to NA. You can try to increase the
## value of the argument nPermSimple (for example set it nPermSimple = 10000)

## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.

## leading edge analysis...

## done...

f <- data.frame(ID=1:nrow(gsea_results@result), Description =1:nrow(gsea_results@result), NES=1:nrow(gsea_results@result), pvalue=1:nrow(gsea_results@result))
f[,2]<- gsea_results$Description
f[,3]<- gsea_results$NES
f[,4] <- gsea_results$pvalue
f[,5] <- gsea_results$p.adjust
f <- f[order(f$pvalue), ] 
f

ReactomaPAData = ivoD10_1 %>% filter(ivoD10_1$avg_log2FC < 0)
dup_gene_symbols <- ReactomaPAData %>%
  dplyr::filter(duplicated(gene)) %>%
  dplyr::pull(gene)
ReactomaPAData %>%
  dplyr::filter(gene %in% dup_gene_symbols) %>%
  dplyr::arrange(gene)

filtered_dge_mapped_df <- ReactomaPAData %>%
  dplyr::arrange(dplyr::desc(abs(avg_log2FC))) %>%
  dplyr::distinct(gene, .keep_all = TRUE)
lfc_vector <- filtered_dge_mapped_df$avg_log2FC
names(lfc_vector) <- filtered_dge_mapped_df$gene
lfc_vector <- sort(lfc_vector, decreasing = TRUE)
head(lfc_vector)

##      BRWD1     ZNF540      COPB1    SEPTIN8       ADD2      CUL4B 
## -0.1000803 -0.1001582 -0.1002441 -0.1002441 -0.1002893 -0.1002893

set.seed(2020)
gsea_results<-GSEA(lfc_vector, pvalueCutoff = 0.05, seed = TRUE, pAdjustMethod = "BH",TERM2GENE = TranscriptionFactors)

## preparing geneSet collections...

## GSEA analysis...

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (2.61% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 4 pathways for which P-values were not calculated properly
## due to unbalanced (positive and negative) gene-level statistic values. For such
## pathways pval, padj, NES, log2err are set to NA. You can try to increase the
## value of the argument nPermSimple (for example set it nPermSimple = 10000)

## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.

## leading edge analysis...

## done...

g <- data.frame(ID=1:nrow(gsea_results@result), Description =1:nrow(gsea_results@result), NES=1:nrow(gsea_results@result), pvalue=1:nrow(gsea_results@result))
g[,2]<- gsea_results$Description
g[,3]<- gsea_results$NES
g[,4] <- gsea_results$pvalue
g[,5] <- gsea_results$p.adjust
g <- g[order(g$pvalue), ] 
g

# First let's create a mapped data frame we can join to the differential
# expression stats
#dge_mapped_df <- data.frame(
#  gene_symbol = mapIds(
#    org.Mm.eg.db,
#    keys = dge_df$Gene,
#    keytype = "ENSEMBL",
#    column = "SYMBOL",
#    multiVals = "first"
#  )) %>%
#  dplyr::filter(!is.na(gene_symbol)) %>%
#  tibble::rownames_to_column("Ensembl") %>%
#  dplyr::inner_join(dge_df, by = c("Ensembl" = "Gene"))