1. Cytokine Stimulated CD362+ Cells
vs. Unstimulated CD362+ Cells
1.1
Differentially Expressed Genes
1.1.1 Up Regulated
1.1.2 Down Regulated
1.2.
Pathway Analysis Cytokine Stimulated CD362+ DEGs
1.2.1 KEGG
1.2.2 Reactome
1.2.3 GO
1.3 Highlighted
Pathways
1.3.1
Apoptosis
1.3.2 Caspases,
Cytochrome c
1.3.3 “Find Me”
Apoptosis Signals
1.3.4
Transmembrane Proteins
1.3.5
Monocyte/Macrophage (Innate Immunity)
2.
Cytokine Stimulated CD362- Cells vs. Unstimulated CD362+ Cells
2.1 Differentially Expressed Genes
2.1.1 Up Regulated
2.1.2 Down Regulated
2.2. Pathway Analysis Cytokine Stimulated
CD362- DEGs
2.2.1 KEGG
2.2.2 Reactome
2.2.3 GO
2.3 Highlighted Pathways
2.3.1 Apoptosis
2.3.2 Caspases, Cytochrome c
2.3.3 “Find Me” Apoptosis Signals
2.3.4 Transmembrane Proteins
2.3.5 Monocyte/Macrophage (Innate Immunity)
3. Comparing Highlighted Pathways
3.1 Apoptosis
3.2
Caspases, Cytochrome c
3.3 “Find Me”
Apoptosis Signals
3.4 Transmembrane
Proteins
3.5 Monocyte/Macrophage
(Innate Immunity)
4. Private Gene
Signatures
4.1 Cytokine Stimulated
CD362+ Private Genes
4.2 Pathway
Analysis Cytokine Stimulated CD362+ Private DEGs
4.2.1 KEGG
4.2.2 Reactome
4.2.3 GO
4.3 Cytokine
Stimulated CD362+ Private Genes
4.4
Pathway Analysis Cytokine Stimulated CD362- Private DEGs
4.4.1 KEGG
4.4.2 Reactome
4.4.3 GO
library(knitr)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
library(dplyr)
library(biomaRt)
library(tximport)
library(pheatmap)
library(DT)
library(ggplot2)
library(DESeq2)
library(limma)
library(edgeR)
library(ggpubr)
library(pathfindR)
library(viridis)
library(GSVA)
library(sva)dir <- ("/data/github/Miltenyi_Biotec/new_Kallisto_Quant/")
samples <- read.table(file.path(dir, "metadata.csv"),
sep=",", header=T, row.names="row")
files <- file.path(dir, rownames(samples), "abundance.h5")
names(files) <- paste0(rownames(samples))
mart <- biomaRt::useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
results <- biomaRt::getBM(attributes = c("ensembl_transcript_id_version", "hgnc_symbol"), mart = mart)
tx2gene <- results[, 1:2]
txi <- tximport::tximport(files, type = "kallisto", tx2gene = tx2gene)
library(limma)
samples$group = as.factor(paste(samples$condition,samples$cell, sep="_"))
design = model.matrix( ~ 0 + group + patient, data=samples)
y <- edgeR::DGEList(txi$counts)
keep <- edgeR::filterByExpr(y, design)
y <- y[keep, ]
# normalize and run voom transformation
y <- edgeR::calcNormFactors(y)
logcpm <- edgeR::cpm(y, normalized.lib.sizes = T, log=TRUE)
scaled <- t(scale(t(logcpm)))
v <- limma::voom(y, design, plot = F)
## dds for combat seq later?
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~ patient + group)
dds <- DESeq(dds)
# small prefilter
keep <- rowSums(counts(dds) >= 20) >= 4
dds <- dds[keep,]
#combat seq
# 1. use raw integer counts
# 2. normalise and transform after correction.
mm <- model.matrix(~group, data=samples)
# takes a long time, write to file
comb = sva::ComBat_seq(assay(dds), batch=dds$patient, group = dds$group)## Found 4 batches
## Using full model in ComBat-seq.
## Adjusting for 7 covariate(s) or covariate level(s)
## Estimating dispersions
## Fitting the GLM model
## Shrinkage off - using GLM estimates for parameters
## Adjusting the data
dds = DESeqDataSetFromMatrix(comb, colData = samples, design = ~ group )
dds = DESeq(dds)
vsd = vst(dds, blind=F)
comb = assay(vsd)
#write.table(comb, "/data/ORBSEN_PATHFINDR/combat_seq.txt", row.names = T, sep="\t", quote=F)
#comb = read.csv("/data/ORBSEN_PATHFINDR/combat_seq.txt", row.names = 1, header=T, sep="\t")
# limma removeBatch
# not as effective!
#vsd <- vst(dds, blind=FALSE)
#mat <- assay(vsd)
#mm <- model.matrix(~group, data=samples)
#mat <- limma::removeBatchEffect(mat, batch=vsd$patient, design=mm)
#for pca plots, subsetting things.
plot1 = read.csv("/data/ORBSEN_PATHFINDR/plot1.txt", header=T, row.names = 1, sep="\t")
plot2 = read.csv("/data/ORBSEN_PATHFINDR/plot2.txt", header=T, row.names = 1, sep="\t")Differentially expressed genes were detected using the
limma::voom() package with the following parameters:
~ 0 + patient + group where group is a
combination of the cell type (CD362+) and condition
(control/cytokine) denoting if the cells were stimulated.
The effects of patient variability are controlled for in the model
design.<=0.05.contrast <- limma::makeContrasts(
CD362_pos = groupcytokine_ARIA_CD362 - groupcontrol_ARIA_CD362,
CD362_neg = groupcytokine_ARIA - groupcontrol_ARIA,
levels = colnames(design))
fit = limma::lmFit(v, design = design)
fit <- limma::eBayes(limma::contrasts.fit(limma::lmFit(v, design = design), contrast))
CD362_pos = limma::topTable(fit, number=Inf, p.value = 0.05, adjust.method = "BH", coef="CD362_pos")
CD362_pos = tibble::rownames_to_column(CD362_pos, "Gene")
CD362_neg = limma::topTable(fit, number=Inf, p.value = 0.05, adjust.method = "BH", coef="CD362_neg")
CD362_neg = tibble::rownames_to_column(CD362_neg, "Gene")
info = biomaRt::getBM(attributes=c("entrezgene_description", "hgnc_symbol"),
filters="hgnc_symbol",
values=CD362_pos$Gene,
mart=mart,
useCache=T)
CD362_pos = merge(CD362_pos, info, by.x="Gene", by.y="hgnc_symbol")
info = biomaRt::getBM(attributes=c("entrezgene_description", "hgnc_symbol"),
filters="hgnc_symbol",
values=CD362_neg$Gene,
mart=mart,
useCache=T)
CD362_neg = merge(CD362_neg, info, by.x="Gene", by.y="hgnc_symbol")CD362_pos = subset(CD362_pos, select=c(Gene, entrezgene_description, logFC, adj.P.Val))
pos = CD362_pos[which(CD362_pos$logFC>0),]
pos = pos[order(pos$logFC, decreasing=T),]
pos$logFC = round(pos$logFC, 3)
DT::datatable(pos, rownames = FALSE, options=list(scrollX=T))Enrichment Analysis was performed using the
pathfindR::run_pathfindR() package. Differentially
expressed genes reported in CD362+ cytokine stimulated cells with
abs(logFC>1.5) were used as input against the
KEGG, Reactome and
GO Biological Processes (BP), Molecular Function (MF) and Cellular Components (CC)
databases.
100 iterations were run for each analysis - pathways considered statistically significant in >50 iterations are reported in the results below:
Fold Enrichment > 1 = over represetation, < 1 = under representation
keggpathway1 = read.csv("/data/ORBSEN_PATHFINDR/CD362_pos_kegg.txt", header=T, sep="\t")
reactome1 = read.csv("/data/ORBSEN_PATHFINDR/CD362_pos_reactome.txt", header=T, sep="\t")
go1 = read.csv("/data/ORBSEN_PATHFINDR/CD362_pos_go-all.txt", header=T, sep="\t")keggpathway1 = keggpathway1[which(keggpathway1$occurrence>50), ]
DT::datatable(keggpathway1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))reactome1 = reactome1[which(reactome1$occurrence >50), ]
DT::datatable(reactome1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))Per request, specific pathways were investigated for over representation in CD362+ cytokine stimulated cells vs. unstimulated CD362+ cells:
Pathways have been clustered by similarity of biological relevance to aid visualisation.
Please note that the pathways selected below may not be representative of all pathways involved in for example, Apoptosis or Caspase pathways. Those with more domain knowledge may wish to utilize the search function in the complete list of pathways returned by KEGG, Reactome and Go in Section 1.2. Contact me if you want specific pathways included in plots.
test = rbind(keggpathway1, reactome1, go1)
search1 = dplyr::filter(test, grepl('apop|Apop', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)Results from KEGG, Reactome and
GO were merged and searched for terms with “apoptosis”
present in them.
save = c()
for(i in 1:nrow(clst_search1)){
row = as.data.frame(clst_search1[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!clst_search1$ID=="R-HSA-109581",]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(y=Term_Description,x=Fold_Enrichment)) +
geom_point(aes(color=lowest_p,size=size)) +
scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
labs( title = "CD362+ Apoptosis Associated Pathways",
subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
x='Fold Enrichment', y=NULL,
color='P-value',size='Gene\nnumber'
) + theme_linedraw() + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size=12),
axis.text.y.left = element_text(size = 12),
axis.title.x.bottom = element_text(size=10)
)DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))Results from KEGG, Reactome and
GO were merged and searched for terms with “caspase” and
“cytochrome” present in them.
search1 = dplyr::filter(test, grepl('casp|Casp|cytochr', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)save = c()
for(i in 1:nrow(clst_search1)){
row = as.data.frame(clst_search1[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
geom_point(aes(color=lowest_p,size=size)) +
scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
labs( title = "CD362+ Caspase/Cytochrome c Associated Pathways",
subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
x='Fold Enrichment', y=NULL,
color='P-value',size='Gene\nnumber'
) + theme_linedraw() + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size=12),
axis.text.y.left = element_text(size = 12),
axis.title.x.bottom = element_text(size=10)
)DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=3))Results from KEGG, Reactome and
GO were merged and searched for terms with “ATP”,
“phosphatase”, “nucleotide”, “sphingolipid”, and “lysomal” present in
them.
search1 = dplyr::filter(test, grepl('ATP|phospha|Phospha|nucleot|Nucleot|Sphing|sphing|Lyso|lyso', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)save = c()
for(i in 1:nrow(clst_search1)){
row = as.data.frame(clst_search1[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!grepl("repair|Repair|DNA", clst_search1$Term_Description), ]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
geom_point(aes(color=lowest_p,size=size)) +
scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
labs( title = "CD362+ 'Find Me' Signal Associated Pathways",
subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
x='Fold Enrichment', y=NULL,
color='P-value',size='Gene\nnumber'
) + theme_linedraw() + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size=12),
axis.text.y.left = element_text(size = 12),
axis.title.x.bottom = element_text(size=10)
)DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=3))Results from KEGG, Reactome and
GO were merged and searched for pathways containing genes
from the TMEM family.
search1 = dplyr::filter(test, grepl('TMEM', Up_regulated))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)save = c()
for(i in 1:nrow(clst_search1)){
row = as.data.frame(clst_search1[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!grepl("repair|Repair|DNA", clst_search1$Term_Description), ]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
geom_point(aes(color=lowest_p,size=size)) +
scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
labs( title = "CD362+ Transmembrane Protein Associated Pathways",
subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
x='Fold Enrichment', y=NULL,
color='P-value',size='Gene\nnumber'
) + theme_linedraw() + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size=12),
axis.text.y.left = element_text(size = 12),
axis.title.x.bottom = element_text(size=10)
)DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=3))Results from KEGG, Reactome and
GO were merged and searched for terms with “Innate”,
“neutrohil”, and “effector” present in them.
search1 = dplyr::filter(test, grepl('innate|Innate|Neutro|Effector', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)save = c()
for(i in 1:nrow(clst_search1)){
row = as.data.frame(clst_search1[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!grepl("SARS", clst_search1$Term_Description), ]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
geom_point(aes(color=lowest_p,size=size)) +
scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
labs( title = "CD362+ Cytokine/Macrophage Associated Pathways",
subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
x='Fold Enrichment', y=NULL,
color='P-value',size='Gene\nnumber'
) + theme_linedraw() + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size=12),
axis.text.y.left = element_text(size = 12),
axis.title.x.bottom = element_text(size=10)
)DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))Differentially expressed genes were detected using the
limma::voom() package with the following parameters:
~ 0 + patient + group where group is a
combination of the cell type (CD362-) and condition
(control/cytokine) denoting if the cells were stimulated.
The effects of patient variability are controlled for in the model
design.<=0.05.CD362_neg = subset(CD362_neg, select=c(Gene, entrezgene_description, logFC, adj.P.Val))
pos = CD362_neg[which(CD362_neg$logFC>0),]
pos = pos[order(pos$logFC, decreasing=T),]
pos$logFC = round(pos$logFC, 3)
DT::datatable(pos, rownames = FALSE, options=list(scrollX=T))keggpathway2 = read.csv("/data/ORBSEN_PATHFINDR/CD362_neg_kegg.txt", header=T, sep="\t")
reactome2 = read.csv("/data/ORBSEN_PATHFINDR/CD362_neg_reactome.txt", header=T, sep="\t")
go2 = read.csv("/data/ORBSEN_PATHFINDR/CD362_neg_go-all.txt", header=T, sep="\t")DT::datatable(keggpathway2[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))DT::datatable(reactome2[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))Per request, specific pathways were investigated for over representation in CD362- cytokine stimulated cells vs. unstimulated CD362- cells:
Pathways have been clustered by similarity of biological relevance to aid visualisation.
Please note that the pathways selected below may not be representative of all pathways involved in for example, Apoptosis or Caspase pathways. Those with more domain knowledge may wish to utilize the search function in the complete list of pathways returned by KEGG, Reactome and Go in Section 2.2. Contact me if you want specific pathways included in plots.
test = rbind(keggpathway2, reactome2, go2)
search1 = dplyr::filter(test, grepl('apop|Apop', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)Results from KEGG, Reactome and
GO were merged and searched for terms with “apoptosis”
present in them.
save = c()
for(i in 1:nrow(clst_search1)){
row = as.data.frame(clst_search1[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!clst_search1$ID=="R-HSA-109581",]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
geom_point(aes(color=lowest_p,size=size)) +
scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
labs( title = "CD362+ Apoptosis Associated Pathways",
subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
x='Fold Enrichment', y=NULL,
color='P-value',size='Gene\nnumber'
) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size=12),
axis.text.y.left = element_text(size = 12),
axis.title.x.bottom = element_text(size=10)
)DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))Results from KEGG, Reactome and
GO were merged and searched for terms with “caspase” and
“cytochrome” present in them.
search1 = dplyr::filter(test, grepl('casp|Casp|cytochr', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)save = c()
for(i in 1:nrow(clst_search1)){
row = as.data.frame(clst_search1[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
geom_point(aes(color=lowest_p,size=size)) +
scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
labs( title = "CD362+ Caspase/Cytochrome c Associated Pathways",
subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
x='Fold Enrichment', y=NULL,
color='P-value',size='Gene\nnumber'
) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size=12),
axis.text.y.left = element_text(size = 12),
axis.title.x.bottom = element_text(size=10)
)DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=3))Results from KEGG, Reactome and
GO were merged and searched for terms with “ATP”,
“phosphatase”, “nucleotide”, “sphingolipid”, and “lysomal” present in
them.
search1 = dplyr::filter(test, grepl('ATP|phospha|Phospha|nucleot|Nucleot|Sphing|sphing|Lyso|lyso', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)save = c()
for(i in 1:nrow(clst_search1)){
row = as.data.frame(clst_search1[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!grepl("repair|Repair|DNA", clst_search1$Term_Description), ]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
geom_point(aes(color=lowest_p,size=size)) +
scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
labs( title = "CD362+ 'Find Me' Signal Associated Pathways",
subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
x='Fold Enrichment', y=NULL,
color='P-value',size='Gene\nnumber'
) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size=12),
axis.text.y.left = element_text(size = 12),
axis.title.x.bottom = element_text(size=10)
)DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=3))Results from KEGG, Reactome and
GO were merged and searched for pathways containing genes
from the TMEM family.
search1 = dplyr::filter(test, grepl('TMEM', Up_regulated))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)save = c()
for(i in 1:nrow(clst_search1)){
row = as.data.frame(clst_search1[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!grepl("repair|Repair|DNA", clst_search1$Term_Description), ]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
geom_point(aes(color=lowest_p,size=size)) +
scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
labs( title = "CD362+ Transmembrane Protein Associated Pathways",
subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
x='Fold Enrichment', y=NULL,
color='P-value',size='Gene\nnumber'
) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size=12),
axis.text.y.left = element_text(size = 12),
axis.title.x.bottom = element_text(size=10)
)DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=3))Results from KEGG, Reactome and
GO were merged and searched for terms with “Innate”,
“neutrohil”, and “effector” present in them.
search1 = dplyr::filter(test, grepl('innate|Innate|Neutro|Effector', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)save = c()
for(i in 1:nrow(clst_search1)){
row = as.data.frame(clst_search1[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!grepl("SARS", clst_search1$Term_Description), ]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
geom_point(aes(color=lowest_p,size=size)) +
scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
labs( title = "CD362+ Cytokine/Macrophage Associated Pathways",
subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
x='Fold Enrichment', y=NULL,
color='P-value',size='Gene\nnumber'
) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size=12),
axis.text.y.left = element_text(size = 12),
axis.title.x.bottom = element_text(size=10)
)DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))The highlighted pathways in Section 1.3 and Section 2.3 were selected for direct comparison between CD362+ and CD362- cells using Gene Set Variation Analysis (GSVA). Briefly, GSVA assesses the relative enrichment of gene sets across samples using a non-parametric approach. Conceptually, GSVA transforms a gene by sample expression matrix into a geneset by sample pathway enrichment matrix. This facilitates many forms of statistical analysis in the ‘space’ of pathways rather than genes, providing a higher level of interpretability.
The input gene expression matrix to GSVA was batch
corrected using sva::Combat-Seq to regress out the effect
of patient variability prior to normalisation and transformation using
DESeq2::vst().
The results of GSVA are clustered and visualised using heatmaps to
identify patterns of over/under enrichment in CD362+ or CD362- cells. A
formal t.test is conducted between CD362+ and CD362- groups
and reported in the tables below.
N.B: Prior to running GSVA, msigdbr() is used to
download and format gene sets from KEGG, Reactome and GO. Unfortunately
when extracting pathways from msigdbr() that are deemed
significant by pathfindR using the pathway ID (e.g hsa04210), not every
pathway is returned resulting in a small loss of information.
test = rbind(keggpathway1, reactome1, go1)
test2 = rbind(keggpathway2, reactome2, go2)
# ORBCEL undergo extrinsic (FasL) and intrinsic (Bcl2) mediated apoptosis/anoikis within 2/3 hrs of infusion
search1 = dplyr::filter(test, grepl('apop|Apop', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!clst_search1$ID=="R-HSA-109581",]
search2 = dplyr::filter(test2, grepl('apop|Apop', Term_Description))
clst_search2 = cluster_enriched_terms(search2, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search2 = clst_search2[which(clst_search2$occurrence>50), ]
clst_search2 = clst_search2[!clst_search2$ID=="R-HSA-109581",]sets <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:KEGG")
sets1 <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
sets2 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:BP")
sets3 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:MF")
sets4 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:CC")
master = rbind(sets, sets1, sets2, sets3, sets4)
# use all sets returned by both pathway analysis
my_sets = master[which(master$gs_exact_source %in% c(clst_search1$ID, clst_search2$ID)),]
ids = unique(my_sets$gs_exact_source)
my_sets = split(
my_sets$gene_symbol, # The genes we want split into pathways
my_sets$gs_name # The pathways made as the higher levels of the list
)
cases = rownames(plot1)[seq(2,nrow(plot1),2)]
control = rownames(plot2)[seq(2,nrow(plot2),2)]
var = GSVA::gsva(comb[,c(cases,control)], my_sets, method = "gsva", kcdf = "Gaussian", min.sz = 5, max.sz = 500, mx.diff = TRUE, verbose = FALSE)
rownames(var) = sub("^[^_]*_", "", rownames(var))
ann_col = data.frame(row.names = colnames(var),
Cell = c(rep("CD362+ ",4),rep("CD362- ",4)))
col <- c("forestgreen", "purple")
names(col) <- c("CD362+ ", "CD362- ")
ann_clr <- list(Cell = col)
var = t(scale(t(var), center = T))
pheatmap(var,
cluster_rows = T,
cluster_cols = T,
scale = "none",
annotation_col=ann_col,
annotation_colors = ann_clr,
color = hcl.colors(100, "RdBu",rev=T))Apoptosis,
TNFR1 Induced Proapoptotic Signalling,
Apoptotic Execution Phase,
Intrinsic Pathway For Apoptosis,
Caspase Activation Via Extrinsic Apoptotic Signalling Pathway
and Extrinsic Apoptotic Signalling Pathway have higher
enrichment scores in CD362+ cells than CD362- cells, however none of the
differences in means are statistically significant:pathway = c()
pval = c()
pos = c()
neg = c()
for(i in 1:nrow(var)){
name = rownames(var)[i]
pathway = c(pathway, name)
row = as.data.frame(var[i,])
t = t.test(row[1:4,], row[5:8,], alternative = "two.sided")
pval = c(pval, t$p.value)
pos = c(pos, as.numeric(t$estimate[1]))
neg = c(neg, as.numeric(t$estimate[2]))
}
adj = p.adjust(pval, method = "BH")
result = data.frame(row.names = pathway,
mean_CD362_positive = pos,
mean_CD362_negative = neg,
p.value = pval,
adj.p.val = adj)
result = result[order(result$mean_CD362_positive, decreasing = T),]
DT::datatable(result, rownames = T, options=list(scrollX=T))(logFC > 1.5 & Adj.P.Value <= 0.05) we can
conclude that Cytokine stimulated CD362+ cells have more significant
differentially expressed genes involved in Apoptosis associated pathways
than Cytokine stimulated CD362- cells:plota = clst_search1[which(clst_search1$ID %in% ids),]
plotb = clst_search2[which(clst_search2$ID %in% ids),]
save = c()
for(i in 1:nrow(plota)){
row = as.data.frame(plota[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
plota$size = save
save = c()
for(i in 1:nrow(plotb)){
row = as.data.frame(plotb[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
plotb$size = save
plota = subset(plota, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plota$group = "CD362+"
plotb = subset(plotb, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plotb$group = "CD362-"
plot = rbind(plota, plotb)
ggplot(plot, aes(x=Fold_Enrichment,y=Term_Description)) +
geom_point(aes(color=group,size=size), position=position_jitter(h=0.0,w=0.0), alpha = 0.6) +
scale_color_manual(values = c("CD362+" = "forestgreen",
"CD362-"="purple")) +
#scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
labs( title = "Apoptosis Associated Pathways",
subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
x='Fold Enrichment', y=NULL,
color='Cell Type',size='Gene\nnumber'
) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size=12),
axis.text.y.left = element_text(size = 12),
axis.title.x.bottom = element_text(size=10)
)View CD362+ Apoptosis Associated Gene Sets
View CD362- Apoptosis Associated Gene Sets
Return to Table of Contents
genes = my_sets$KEGG_APOPTOSIS
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$REACTOME_APOPTOTIC_EXECUTION_PHASE
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$GOBP_ACTIVATION_OF_CYSTEINE_TYPE_ENDOPEPTIDASE_ACTIVITY_INVOLVED_IN_APOPTOTIC_PROCESS
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$REACTOME_CASPASE_ACTIVATION_VIA_EXTRINSIC_APOPTOTIC_SIGNALLING_PATHWAY
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$REACTOME_DEFECTIVE_INTRINSIC_PATHWAY_FOR_APOPTOSIS
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$GOBP_EXTRINSIC_APOPTOTIC_SIGNALING_PATHWAY
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$GOBP_INTRINSIC_APOPTOTIC_SIGNALING_PATHWAY_IN_RESPONSE_TO_DNA_DAMAGE
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$REACTOME_INTRINSIC_PATHWAY_FOR_APOPTOSIS
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$REACTOME_TNFR1_INDUCED_PROAPOPTOTIC_SIGNALING
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))Elastic net logistic regression was performed on genes involved in the apoptotic pathways listed above to arrive at a set of genes capable of delineating Cytokine stimulated CD362+ cells vs. Cytokine stimulated CD362- cells:
genes = unique(unlist(my_sets))
mat = comb[which(rownames(comb) %in% genes), c(cases,control)]
mat = as.data.frame(scale(t(mat), center = T, scale=T))
mat$Group = factor(c(rep("CD362+",4),rep("CD362-",4)))
mat = mat %>% select_if(~ ! any(is.na(.)))
model = glmnet::cv.glmnet(as.matrix(mat[,c(1:ncol(mat)-1)]), mat$Group, family = "binomial", alpha = 0.3, lambda = NULL)
sig_feat = coef(model, s="lambda.min")
sig_df = data.frame(name = sig_feat@Dimnames[[1]][sig_feat@i + 1], coefficient = sig_feat@x)
sig_df = sig_df[2:nrow(sig_df),]
mat = as.data.frame(t(mat))
mat = mat[which(rownames(mat) %in% sig_df$name),]
names = rownames(mat)
mat = as.data.frame(sapply(mat, as.numeric))
rownames(mat) = names
ann_col = data.frame(row.names = colnames(var),
Cell = c(rep("CD362+ ",4),rep("CD362- ",4)))
col <- c("forestgreen", "purple")
names(col) <- c("CD362+ ", "CD362- ")
ann_clr <- list(Cell = col)
mat = t(scale(t(mat), center = T, scale = T))
pheatmap(mat,
cluster_rows = T,
cluster_cols = T,
scale = "none",
annotation_col=ann_col,
annotation_colors = ann_clr,
color = hcl.colors(100, "RdBu",rev=T))The heatmap displaying GSVA enrichment scores for
Caspase/Cytochrome C pathways shows good sample separation between
stimulated CD362+ vs. stimulated CD362- cells, however the results are
contradictory: (1) Up-regulation of
Negative regulation of release of cytochrome c from mitochondria
suggests that cytochrome c release is reduced in stimulated CD362+ cells
however, (2) Up-regulation of caspase activation via death receptors and
extrinsic apoptotic signalling show that stimulated CD362+ cells
suggests apoptotic activity is upregulated.
It is worth while investigating the differentially expressed genes listed in CD362+ Caspase/Cytochrome c Associated Gene Sets.
Of note, the pathway
Negative regulation of release of cytochrome c from mitochondria
contains only 17 genes in its set (of which three are differentially
expressed). The small size of this gene set is the likely cause of it
being labelled as a significantly enriched pathway. Take these results
with a pinch of salt.
search1 = dplyr::filter(test, grepl('casp|Casp|cytochr', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
search2 = dplyr::filter(test2, grepl('casp|Casp|cytochr', Term_Description))
clst_search2 = cluster_enriched_terms(search2, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search2 = clst_search2[which(clst_search2$occurrence>50), ]sets <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:KEGG")
sets1 <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
sets2 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:BP")
sets3 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:MF")
sets4 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:CC")
master = rbind(sets, sets1, sets2, sets3, sets4)
# use all sets returned by both pathway analysis
my_sets = master[which(master$gs_exact_source %in% c(clst_search1$ID, clst_search2$ID)),]
ids = unique(my_sets$gs_exact_source)
my_sets = split(
my_sets$gene_symbol, # The genes we want split into pathways
my_sets$gs_name # The pathways made as the higher levels of the list
)
cases = rownames(plot1)[seq(2,nrow(plot1),2)]
control = rownames(plot2)[seq(2,nrow(plot2),2)]
var = GSVA::gsva(comb[,c(cases,control)], my_sets, method = "gsva", kcdf = "Poisson", min.sz = 5, max.sz = 500, mx.diff = TRUE, verbose = FALSE)
rownames(var) = sub("^[^_]*_", "", rownames(var))
ann_col = data.frame(row.names = colnames(var),
Cell = c(rep("CD362+ ",4),rep("CD362- ",4)))
col <- c("forestgreen", "purple")
names(col) <- c("CD362+ ", "CD362- ")
ann_clr <- list(Cell = col)
var = t(scale(t(var), center = T))
pheatmap(var,
cluster_rows = T,
cluster_cols = T,
scale="none",
annotation_col=ann_col,
annotation_colors = ann_clr,
color = hcl.colors(100, "RdBu",rev=T))pathway = c()
pval = c()
pos = c()
neg = c()
for(i in 1:nrow(var)){
name = rownames(var)[i]
pathway = c(pathway, name)
row = as.data.frame(var[i,])
t = t.test(row[1:4,], row[5:8,], alternative = "two.sided")
pval = c(pval, t$p.value)
pos = c(pos, as.numeric(t$estimate[1]))
neg = c(neg, as.numeric(t$estimate[2]))
}
adj = p.adjust(pval, method = "BH")
result = data.frame(row.names = pathway,
mean_CD362_positive = pos,
mean_CD362_negative = neg,
p.value = pval,
adj.p.val = adj)
DT::datatable(result, rownames = T, options=list(scrollX=T))The fold enrichment scores for
Negative regulation of release of cytochrome c from mitochondria
are nearly identical for stimulated CD362+ and stimulated CD362- cells.
Referring to my previous point about the background gene set size, this
pathway is somewhat of a red herring.
Encouragingly, stimulated CD362+ cells exhibit higher fold enrichment changes in pathways pertaining to capsase activation compared to stimulated CD362- cells.
plota = clst_search1[which(clst_search1$ID %in% ids),]
plotb = clst_search2[which(clst_search2$ID %in% ids),]
save = c()
for(i in 1:nrow(plota)){
row = as.data.frame(plota[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
plota$size = save
save = c()
for(i in 1:nrow(plotb)){
row = as.data.frame(plotb[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
plotb$size = save
plota = subset(plota, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plota$group = "CD362+"
plotb = subset(plotb, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plotb$group = "CD362-"
plot = rbind(plota, plotb)
ggplot(plot, aes(x=Fold_Enrichment,y=Term_Description)) +
geom_point(aes(color=group,size=size), position=position_jitter(h=0.0,w=0.0), alpha = 0.6) +
scale_color_manual(values = c("CD362+" = "forestgreen",
"CD362-"="purple")) +
#scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
labs( title = "Caspase/Cytochrome C Associated Pathways",
subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
x='Fold Enrichment', y=NULL,
color='Cell Type',size='Gene\nnumber'
) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size=12),
axis.text.y.left = element_text(size = 12),
axis.title.x.bottom = element_text(size=10)
)View CD362+ Caspase/Cytochrome c Associated Gene
Sets
View CD362- Caspase/Cytochrome c
Associated Gene Sets
Return to Table of
Contents
Of note, the
Negative regulation of release of cytochrome c from mitochondria
pathway shows no structure when plotting the normalised gene expression
of all genes in the pathway. This corroborates with my previous points
and we can build the case that this pathway is noise.
Stimulated CD362+ cells cluster well in the heatmap when plotting the normalised gene expression for genes involved in capase activation. Using logistic regression in the section below we can pull out the predictive features that are driving this clustering pattern.
genes = my_sets$GOBP_NEGATIVE_REGULATION_OF_RELEASE_OF_CYTOCHROME_C_FROM_MITOCHONDRIA
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$REACTOME_CASPASE_ACTIVATION_VIA_DEATH_RECEPTORS_IN_THE_PRESENCE_OF_LIGAND
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$REACTOME_CASPASE_ACTIVATION_VIA_EXTRINSIC_APOPTOTIC_SIGNALLING_PATHWAY
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))Elastic net logistic regression was performed on genes involved in the Caspase, Cytochrome C pathways listed above was used to arrive at a set of genes capable of delineating Cytokine stimulated CD362+ cells vs. Cytokine stimulated CD362- cells:
genes = unique(unlist(my_sets))
mat = comb[which(rownames(comb) %in% genes), c(cases,control)]
mat = as.data.frame(scale(t(mat), center = T, scale=T))
mat$Group = factor(c(rep("CD362+",4),rep("CD362-",4)))
mat = mat %>% select_if(~ ! any(is.na(.)))
model = glmnet::cv.glmnet(as.matrix(mat[,c(1:ncol(mat)-1)]), mat$Group, family = "binomial", alpha = 0.1)
sig_feat = coef(model, s="lambda.min")
sig_df = data.frame(name = sig_feat@Dimnames[[1]][sig_feat@i + 1], coefficient = sig_feat@x)
sig_df = sig_df[2:nrow(sig_df),]
mat = as.data.frame(t(mat))
mat = mat[which(rownames(mat) %in% sig_df$name),]
names = rownames(mat)
mat = as.data.frame(sapply(mat, as.numeric))
rownames(mat) = names
ann_col = data.frame(row.names = colnames(var),
Cell = c(rep("CD362+ ",4),rep("CD362- ",4)))
col <- c("forestgreen", "purple")
names(col) <- c("CD362+ ", "CD362- ")
ann_clr <- list(Cell = col)
mat = t(scale(t(mat), center = T, scale = T))
pheatmap(mat,
cluster_rows = T,
cluster_cols = T,
scale="column",
annotation_col=ann_col,
annotation_colors = ann_clr,
color = hcl.colors(100, "RdBu",rev=T))search1 = dplyr::filter(test, grepl('ATP|phospha|Phospha|nucleot|Nucleot|Sphing|sphing|Lyso|lyso', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!grepl("repair|Repair|DNA", clst_search1$Term_Description), ]
search2 = dplyr::filter(test2, grepl('ATP|phospha|Phospha|nucleot|Nucleot|Sphing|sphing|Lyso|lyso', Term_Description))
clst_search2 = cluster_enriched_terms(search2, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search2 = clst_search2[which(clst_search2$occurrence>50), ]
clst_search2 = clst_search2[!grepl("repair|Repair|DNA", clst_search2$Term_Description), ]sets <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:KEGG")
sets1 <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
sets2 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:BP")
sets3 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:MF")
sets4 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:CC")
master = rbind(sets, sets1, sets2, sets3, sets4)
# use all sets returned by both pathway analysis
my_sets = master[which(master$gs_exact_source %in% c(clst_search1$ID, clst_search2$ID)),]
ids = unique(my_sets$gs_exact_source)
my_sets = split(
my_sets$gene_symbol, # The genes we want split into pathways
my_sets$gs_name # The pathways made as the higher levels of the list
)
cases = rownames(plot1)[seq(2,nrow(plot1),2)]
control = rownames(plot2)[seq(2,nrow(plot2),2)]
var = GSVA::gsva(comb[,c(cases,control)], my_sets, method = "gsva", kcdf = "Gaussian", min.sz = 5, max.sz = 500, mx.diff = TRUE, verbose = FALSE)
rownames(var) = sub("^[^_]*_", "", rownames(var))
ann_col = data.frame(row.names = colnames(var),
Cell = c(rep("CD362+ ",4),rep("CD362- ",4)))
col <- c("forestgreen", "purple")
names(col) <- c("CD362+ ", "CD362- ")
ann_clr <- list(Cell = col)
var = scale(t(var), center = T)
var = t(var)
pheatmap(var,
cluster_rows = T,
cluster_cols = T,
scale="none",
annotation_col=ann_col,
annotation_colors = ann_clr,
color = hcl.colors(100, "RdBu",rev=T))pathway = c()
pval = c()
pos = c()
neg = c()
for(i in 1:nrow(var)){
name = rownames(var)[i]
pathway = c(pathway, name)
row = as.data.frame(var[i,])
t = t.test(row[1:4,], row[5:8,], alternative = "two.sided")
pval = c(pval, t$p.value)
pos = c(pos, as.numeric(t$estimate[1]))
neg = c(neg, as.numeric(t$estimate[2]))
}
adj = p.adjust(pval, method = "BH")
result = data.frame(row.names = pathway,
mean_CD362_positive = pos,
mean_CD362_negative = neg,
p.value = pval,
adj.p.val = adj)
DT::datatable(result, rownames = T, options=list(scrollX=T))plota = clst_search1[which(clst_search1$ID %in% ids),]
plotb = clst_search2[which(clst_search2$ID %in% ids),]
save = c()
for(i in 1:nrow(plota)){
row = as.data.frame(plota[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
plota$size = save
save = c()
for(i in 1:nrow(plotb)){
row = as.data.frame(plotb[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
plotb$size = save
plota = subset(plota, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plota$group = "CD362+"
plotb = subset(plotb, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plotb$group = "CD362-"
plot = rbind(plota, plotb)
ggplot(plot, aes(x=Fold_Enrichment,y=Term_Description)) +
geom_point(aes(color=group,size=size), position=position_jitter(h=0.0,w=0.0), alpha = 0.6) +
scale_color_manual(values = c("CD362+" = "forestgreen",
"CD362-"="purple")) +
#scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
labs( title = "'Find Me' Apoptosis Signals",
subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
x='Fold Enrichment', y=NULL,
color='Cell Type',size='Gene\nnumber'
) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size=12),
axis.text.y.left = element_text(size = 12),
axis.title.x.bottom = element_text(size=10)
)View CD362+ ‘Find Me’ Apoptosis Signal Associated
Gene Sets
View CD362- ‘Find Me’ Apoptosis
Signal Associated Gene Sets
Return to Table of
Contents
genes = my_sets$GOBP_POSITIVE_REGULATION_OF_PHOSPHATIDYLINOSITOL_3_KINASE_SIGNALING
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$GOCC_AUTOLYSOSOME
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$GOMF_PHOSPHATIDYLINOSITOL_3_4_5_TRISPHOSPHATE_BINDING
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$GOMF_PHOSPHATIDYLINOSITOL_3_KINASE_BINDING
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$GOMF_PROTEIN_TYROSINE_PHOSPHATASE_ACTIVITY
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_GLOBO_SERIES
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$REACTOME_METABOLISM_OF_NUCLEOTIDES
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$REACTOME_NUCLEOTIDE_BINDING_DOMAIN_LEUCINE_RICH_REPEAT_CONTAINING_RECEPTOR_NLR_SIGNALING_PATHWAYS
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$REACTOME_NUCLEOTIDE_BIOSYNTHESIS
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$REACTOME_NUCLEOTIDE_SALVAGE
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$REACTOME_RESOLUTION_OF_AP_SITES_VIA_THE_MULTIPLE_NUCLEOTIDE_PATCH_REPLACEMENT_PATHWAY
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))No predictive features were found using elastic net logistic regression, i.e no coefficients retunred by model..
search1 = dplyr::filter(test, grepl('TMEM', Up_regulated))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
search2 = dplyr::filter(test2, grepl('TMEM', Up_regulated))
clst_search2 = cluster_enriched_terms(search2, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search2 = clst_search2[which(clst_search2$occurrence>50), ]sets <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:KEGG")
sets1 <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
sets2 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:BP")
sets3 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:MF")
sets4 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:CC")
master = rbind(sets, sets1, sets2, sets3, sets4)
# use all sets returned by both pathway analysis
my_sets = master[which(master$gs_exact_source %in% c(clst_search1$ID, clst_search2$ID)),]
ids = unique(my_sets$gs_exact_source)
my_sets = split(
my_sets$gene_symbol, # The genes we want split into pathways
my_sets$gs_name # The pathways made as the higher levels of the list
)
cases = rownames(plot1)[seq(2,nrow(plot1),2)]
control = rownames(plot2)[seq(2,nrow(plot2),2)]
var = GSVA::gsva(comb[,c(cases,control)], my_sets, method = "gsva", kcdf = "Gaussian", min.sz = 5, max.sz = 500, mx.diff = TRUE, verbose = FALSE)
rownames(var) = sub("^[^_]*_", "", rownames(var))
ann_col = data.frame(row.names = colnames(var),
Cell = c(rep("CD362+ ",4),rep("CD362- ",4)))
col <- c("forestgreen", "purple")
names(col) <- c("CD362+ ", "CD362- ")
ann_clr <- list(Cell = col)
var = t(scale(t(var), scale=T, center = T))
pheatmap(var,
cluster_rows = T,
cluster_cols = T,
scale="none",
annotation_col=ann_col,
annotation_colors = ann_clr,
color = hcl.colors(100, "RdBu",rev=T))pathway = c()
pval = c()
pos = c()
neg = c()
for(i in 1:nrow(var)){
name = rownames(var)[i]
pathway = c(pathway, name)
row = as.data.frame(var[i,])
t = t.test(row[1:4,], row[5:8,], alternative = "two.sided")
pval = c(pval, t$p.value)
pos = c(pos, as.numeric(t$estimate[1]))
neg = c(neg, as.numeric(t$estimate[2]))
}
adj = p.adjust(pval, method = "BH")
result = data.frame(row.names = pathway,
mean_CD362_positive = pos,
mean_CD362_negative = neg,
p.value = pval,
adj.p.val = adj)
DT::datatable(result, rownames = T, options=list(scrollX=T))plota = clst_search1[which(clst_search1$ID %in% ids),]
plotb = clst_search2[which(clst_search2$ID %in% ids),]
save = c()
for(i in 1:nrow(plota)){
row = as.data.frame(plota[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
plota$size = save
save = c()
for(i in 1:nrow(plotb)){
row = as.data.frame(plotb[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
plotb$size = save
plota = subset(plota, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plota$group = "CD362+"
plotb = subset(plotb, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plotb$group = "CD362-"
plot = rbind(plota, plotb)
ggplot(plot, aes(x=Fold_Enrichment,y=Term_Description)) +
geom_point(aes(color=group,size=size), position=position_jitter(h=0.0,w=0.0), alpha = 0.6) +
scale_color_manual(values = c("CD362+" = "forestgreen",
"CD362-"="purple")) +
#scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
labs( title = "Transmembrane Associated Pathways",
subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
x='Fold Enrichment', y=NULL,
color='Cell Type',size='Gene\nnumber'
) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size=12),
axis.text.y.left = element_text(size = 12),
axis.title.x.bottom = element_text(size=10)
)View CD362+ Transmembrane Associated Gene
Sets
View CD362- Transmembrane Associated Gene
Sets
Return to Table of Contents
genes = my_sets$GOBP_POSITIVE_REGULATION_OF_I_KAPPAB_KINASE_NF_KAPPAB_SIGNALING
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$GOBP_POSITIVE_REGULATION_OF_INTERLEUKIN_1_BETA_PRODUCTION
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$GOBP_POSITIVE_REGULATION_OF_INTERLEUKIN_6_PRODUCTION
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$GOCC_CENTRIOLAR_SATELLITE
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$GOCC_ENDOPLASMIC_RETICULUM_LUMEN
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$GOCC_SPECIFIC_GRANULE_MEMBRANE
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$GOCC_TRANS_GOLGI_NETWORK_MEMBRANE
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$REACTOME_REGULATION_OF_INSULIN_LIKE_GROWTH_FACTOR_IGF_TRANSPORT_AND_UPTAKE_BY_INSULIN_LIKE_GROWTH_FACTOR_BINDING_PROTEINS_IGFBPS
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))Elastic net logistic regression was performed on pathways with “TMEM” genes listed above to arrive at a set of genes capable of delineating Cytokine stimulated CD362+ cells vs. Cytokine stimulated CD362- cells:
genes = unique(unlist(my_sets))
mat = comb[which(rownames(comb) %in% genes), c(cases,control)]
mat = as.data.frame(scale(t(mat), center = T, scale=T))
mat$Group = factor(c(rep("CD362+",4),rep("CD362-",4)))
mat = mat %>% select_if(~ ! any(is.na(.)))
model = glmnet::cv.glmnet(as.matrix(mat[,c(1:ncol(mat)-1)]), mat$Group, family = "binomial", alpha = 0.1)
sig_feat = coef(model, s="lambda.min")
sig_df = data.frame(name = sig_feat@Dimnames[[1]][sig_feat@i + 1], coefficient = sig_feat@x)
sig_df = sig_df[2:nrow(sig_df),]
mat = as.data.frame(t(mat))
mat = mat[which(rownames(mat) %in% sig_df$name),]
names = rownames(mat)
mat = as.data.frame(sapply(mat, as.numeric))
rownames(mat) = names
ann_col = data.frame(row.names = colnames(var),
Cell = c(rep("CD362+ ",4),rep("CD362- ",4)))
col <- c("forestgreen", "purple")
names(col) <- c("CD362+ ", "CD362- ")
ann_clr <- list(Cell = col)
mat = t(scale(t(mat), center = T, scale = T))
pheatmap(mat,
cluster_rows = T,
cluster_cols = T,
scale="column",
annotation_col=ann_col,
annotation_colors = ann_clr,
color = hcl.colors(100, "RdBu",rev=T))search1 = dplyr::filter(test, grepl('innate|Innate|Neutro|Effector', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
search2 = dplyr::filter(test2, grepl('innate|Innate|Neutro|Effector', Term_Description))
clst_search2 = cluster_enriched_terms(search2, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search2 = clst_search2[which(clst_search2$occurrence>50), ]sets <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:KEGG")
sets1 <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
sets2 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:BP")
sets3 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:MF")
sets4 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:CC")
master = rbind(sets, sets1, sets2, sets3, sets4)
# use all sets returned by both pathway analysis
my_sets = master[which(master$gs_exact_source %in% c(clst_search1$ID, clst_search2$ID)),]
ids = unique(my_sets$gs_exact_source)
my_sets = split(
my_sets$gene_symbol, # The genes we want split into pathways
my_sets$gs_name # The pathways made as the higher levels of the list
)
cases = rownames(plot1)[seq(2,nrow(plot1),2)]
control = rownames(plot2)[seq(2,nrow(plot2),2)]
var = GSVA::gsva(comb[,c(cases,control)], my_sets, method = "gsva", kcdf = "Gaussian", min.sz = 5, max.sz = 500, mx.diff = TRUE, verbose = FALSE)
rownames(var) = sub("^[^_]*_", "", rownames(var))
ann_col = data.frame(row.names = colnames(var),
Cell = c(rep("CD362+ ",4),rep("CD362- ",4)))
col <- c("forestgreen", "purple")
names(col) <- c("CD362+ ", "CD362- ")
ann_clr <- list(Cell = col)
var = t(scale(t(var), center = T, scale = T))
pheatmap(var,
cluster_rows = T,
cluster_cols = T,
scale="none",
annotation_col=ann_col,
annotation_colors = ann_clr,
color = hcl.colors(100, "RdBu",rev=T))negative regulation of innate immune respose to be higher
in stimulated CD362+ cells, in conjunction with
activation of innate immune response. It is worthwhile
viewing the differentially expressed genes involved in these pathways to
draw conclusions from this - View CD362+
Monocyte/Macrophage (Innate Pathway) Associated Gene Sets.pathway = c()
pval = c()
pos = c()
neg = c()
for(i in 1:nrow(var)){
name = rownames(var)[i]
pathway = c(pathway, name)
row = as.data.frame(var[i,])
t = t.test(row[1:4,], row[5:8,], alternative = "two.sided")
pval = c(pval, t$p.value)
pos = c(pos, as.numeric(t$estimate[1]))
neg = c(neg, as.numeric(t$estimate[2]))
}
adj = p.adjust(pval, method = "BH")
result = data.frame(row.names = pathway,
mean_CD362_positive = pos,
mean_CD362_negative = neg,
p.value = pval,
adj.p.val = adj)
result = result[order(result$mean_CD362_positive, decreasing = T),]
DT::datatable(result, rownames = T, options=list(scrollX=T))plota = clst_search1[which(clst_search1$ID %in% ids),]
plotb = clst_search2[which(clst_search2$ID %in% ids),]
save = c()
for(i in 1:nrow(plota)){
row = as.data.frame(plota[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
plota$size = save
save = c()
for(i in 1:nrow(plotb)){
row = as.data.frame(plotb[i,])
size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
save = c(save, size)
}
plotb$size = save
plota = subset(plota, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plota$group = "CD362+"
plotb = subset(plotb, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plotb$group = "CD362-"
plot = rbind(plota, plotb)
ggplot(plot, aes(x=Fold_Enrichment,y=Term_Description)) +
geom_point(aes(color=group,size=size), position=position_jitter(h=0.0,w=0.0), alpha = 0.6) +
scale_color_manual(values = c("CD362+" = "forestgreen",
"CD362-"="purple")) +
#scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
labs( title = "Monocyte/Macrophage (Innate Pathway)",
subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
x='Fold Enrichment', y=NULL,
color='Cell Type',size='Gene\nnumber'
) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size=12),
axis.text.y.left = element_text(size = 12),
axis.title.x.bottom = element_text(size=10)
)View CD362+ Monocyte/Macrophage (Innate Pathway)
Associated Gene Sets
View CD362-
Monocyte/Macrophage (Innate Pathway) Associated Gene Sets
Return to Table of Contents
genes = my_sets$GOBP_ACTIVATION_OF_INNATE_IMMUNE_RESPONSE
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$GOBP_NEGATIVE_REGULATION_OF_INNATE_IMMUNE_RESPONSE
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$GOBP_REGULATION_OF_INNATE_IMMUNE_RESPONSE
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$REACTOME_REGULATION_OF_INNATE_IMMUNE_RESPONSES_TO_CYTOSOLIC_DNA
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))genes = my_sets$REACTOME_RHO_GTPASE_EFFECTORS
mat = comb[which(rownames(comb)%in% genes),]
mat = t(scale(t(mat), center = T, scale=T))
pheatmap(t(mat[,c(control,cases)]), cluster_rows = T, cluster_cols = T, scale="none", annotation_row = ann_col, angle_col = 45, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))Elastic net logistic regression was performed on Monocyte/Macrophage pathway genes listed above to arrive at a set of genes capable of delineating Cytokine stimulated CD362+ cells vs. Cytokine stimulated CD362- cells:
genes = unique(unlist(my_sets))
mat = comb[which(rownames(comb) %in% genes), c(cases,control)]
mat = as.data.frame(scale(t(mat), center = T, scale=T))
mat$Group = factor(c(rep("CD362+",4),rep("CD362-",4)))
mat = mat %>% select_if(~ ! any(is.na(.)))
model = glmnet::cv.glmnet(as.matrix(mat[,c(1:ncol(mat)-1)]), mat$Group, family = "binomial", alpha = 1)
sig_feat = coef(model, s="lambda.min")
sig_df = data.frame(name = sig_feat@Dimnames[[1]][sig_feat@i + 1], coefficient = sig_feat@x)
sig_df = sig_df[2:nrow(sig_df),]
mat = as.data.frame(t(mat))
mat = mat[which(rownames(mat) %in% sig_df$name),]
names = rownames(mat)
mat = as.data.frame(sapply(mat, as.numeric))
rownames(mat) = names
ann_col = data.frame(row.names = colnames(var),
Cell = c(rep("CD362+ ",4),rep("CD362- ",4)))
col <- c("forestgreen", "purple")
names(col) <- c("CD362+ ", "CD362- ")
ann_clr <- list(Cell = col)
mat = t(scale(t(mat), center = T, scale = T))
pheatmap(mat,
cluster_rows = T,
cluster_cols = T,
scale="column",
annotation_col=ann_col,
annotation_colors = ann_clr,
color = hcl.colors(100, "RdBu",rev=T))Gene signatures private to CD362+ and CD362- cells were identified and used as input for Enrichment Analysis using pathfindR.
results <- decideTests(fit, adjust.method = "BH", p.value = 0.05, lfc=0)
a <- vennCounts(results)
vennDiagram(results,
names=c("CD362+", "CD362-"),
include=c("both"),
counts.col=c("red", "blue"),
circle.col = c("red", "blue", "green3"))alls <- affycoretools:::makeIndices(results[,1:2])
alls.genes <- lapply(alls, function(x) row.names(results[,1:2])[x])
pos_private = CD362_pos[which(CD362_pos$Gene %in% alls.genes[[1]]),]up = pos_private[which(pos_private$logFC>0),]
DT::datatable(up, rownames = F, options = list(scrollX=T))down = pos_private[which(pos_private$logFC<0),]
DT::datatable(down, rownames = F, options = list(scrollX=T))posprikegg = read.csv("/data/ORBSEN_PATHFINDR/posprikegg.txt", header=T, sep="\t")
posprireactome = read.csv("/data/ORBSEN_PATHFINDR/posprireactome.txt", header=T, sep="\t")
posprigo = read.csv("/data/ORBSEN_PATHFINDR/posprigo.txt", header=T, sep="\t")DT::datatable(posprikegg[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=4))DT::datatable(posprireactome[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=4))DT::datatable(posprigo[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=4))# posprikegg = read.csv("/data/ORBSEN_PATHFINDR/posprikegg.txt", header=T, sep="\t")
# posprireactome = read.csv("/data/ORBSEN_PATHFINDR/posprireactome.txt", header=T, sep="\t")
# posprigo = read.csv("/data/ORBSEN_PATHFINDR/posprigo.txt", header=T, sep="\t")
#
# test = rbind(posprikegg, posprireactome, posprigo)
#
# sets <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:KEGG")
# sets1 <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
# sets2 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:BP")
# sets3 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:MF")
# sets4 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:CC")
# master = rbind(sets, sets1, sets2, sets3, sets4)
# # use all sets returned by private CD362 genes
# my_sets = master[which(master$gs_exact_source %in% test$ID),]
# ids = unique(my_sets$gs_exact_source)
# my_sets = split(
# my_sets$gene_symbol, # The genes we want split into pathways
# my_sets$gs_name # The pathways made as the higher levels of the list
# )
#
# cases = rownames(plot1)[seq(2,nrow(plot1),2)]
# control = rownames(plot2)[seq(2,nrow(plot2),2)]
# var = GSVA::gsva(comb[,c(cases,control)], my_sets, method = "gsva", kcdf = "Gaussian", min.sz = 5, max.sz = Inf, mx.diff = TRUE, verbose = F)
# rownames(var) = sub("^[^_]*_", "", rownames(var))
#
#
# # run GLMNET on it
# var = as.data.frame(t(var))
# var$Group = factor(c(rep("CD362+",4),rep("CD362-",4)))
# model = glmnet::cv.glmnet(as.matrix(var[,c(1:ncol(var)-1)]), var$Group, family = "binomial", alpha = 0.3)
# sig_feat = coef(model, s="lambda.min")
# sig_df = data.frame(name = sig_feat@Dimnames[[1]][sig_feat@i + 1], coefficient = sig_feat@x)
# sig_df = sig_df[2:nrow(sig_df),]
#
# var = as.data.frame(t(var))
# var = var[which(rownames(var) %in% sig_df$name),]
# names = rownames(var)
# var = as.data.frame(sapply(var, as.numeric))
# rownames(var) = names
#
# ann_col = data.frame(row.names = colnames(var),
# Cell = c(rep("CD362+ ",4),rep("CD362- ",4)))
#
# col <- c("forestgreen", "purple")
# names(col) <- c("CD362+ ", "CD362- ")
# ann_clr <- list(Cell = col)
#
# var = t(scale(t(var), center = T, scale = T))
#
# pheatmap(var,
# cluster_rows = T,
# cluster_cols = T,
# scale="none",
# annotation_col=ann_col,
# annotation_colors = ann_clr,
# color = hcl.colors(100, "RdBu",rev=T))neg_private = CD362_neg[which(CD362_neg$Gene %in% alls.genes[[2]]),]up = neg_private[which(neg_private$logFC>0),]
DT::datatable(up, rownames = F, options = list(scrollX=T))down = neg_private[which(neg_private$logFC<0),]
DT::datatable(down, rownames = F, options = list(scrollX=T))negprikegg = read.csv("/data/ORBSEN_PATHFINDR/negprikegg.txt", header=T, sep="\t")
negprireactome = read.csv("/data/ORBSEN_PATHFINDR/negprireactome.txt", header=T, sep="\t")
negprigo = read.csv("/data/ORBSEN_PATHFINDR/negprigo.txt", header=T, sep="\t")DT::datatable(negprikegg[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=4))DT::datatable(negprireactome[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=4))DT::datatable(negprigo[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=4))# negprikegg = read.csv("/data/ORBSEN_PATHFINDR/negprikegg.txt", header=T, sep="\t")
# negprireactome = read.csv("/data/ORBSEN_PATHFINDR/negprireactome.txt", header=T, sep="\t")
# negprigo = read.csv("/data/ORBSEN_PATHFINDR/negprigo.txt", header=T, sep="\t")
#
# test = rbind(negprikegg, negprireactome, negprigo)
#
# sets <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:KEGG")
# sets1 <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
# sets2 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:BP")
# sets3 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:MF")
# sets4 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:CC")
# master = rbind(sets, sets1, sets2, sets3, sets4)
# # use all sets returned by private CD362 genes
# my_sets = master[which(master$gs_exact_source %in% test$ID),]
# my_sets = split(
# my_sets$gene_symbol, # The genes we want split into pathways
# my_sets$gs_name # The pathways made as the higher levels of the list
# )
#
# cases = rownames(plot1)[seq(2,nrow(plot1),2)]
# control = rownames(plot2)[seq(2,nrow(plot2),2)]
# var = GSVA::gsva(comb[,c(cases,control)], my_sets, method = "gsva", kcdf = "Gaussian", min.sz = 5, max.sz = Inf, mx.diff = TRUE, verbose = FALSE)
# rownames(var) = sub("^[^_]*_", "", rownames(var))
#
#
# # run GLMNET on it
# var = as.data.frame(t(var))
# var$Group = factor(c(rep("CD362+",4),rep("CD362-",4)))
# model = glmnet::cv.glmnet(as.matrix(var[,c(1:ncol(var)-1)]), var$Group, family = "binomial", alpha = 0.5)
# sig_feat = coef(model, s="lambda.min")
# sig_df = data.frame(name = sig_feat@Dimnames[[1]][sig_feat@i + 1], coefficient = sig_feat@x)
# sig_df = sig_df[2:nrow(sig_df),]
#
# var = as.data.frame(t(var))
# var = var[which(rownames(var) %in% sig_df$name),]
# names = rownames(var)
# var = as.data.frame(sapply(var, as.numeric))
# rownames(var) = names
#
# ann_col = data.frame(row.names = colnames(var),
# Cell = c(rep("CD362+ ",4),rep("CD362- ",4)))
#
# col <- c("forestgreen", "purple")
# names(col) <- c("CD362+ ", "CD362- ")
# ann_clr <- list(Cell = col)
#
# pheatmap(var,
# cluster_rows = T,
# cluster_cols = T,
# annotation_col=ann_col,
# annotation_colors = ann_clr,
# color = hcl.colors(100, "RdBu",rev=T))