Results

library(knitr)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
library(dplyr)
library(biomaRt)
library(tximport)
library(pheatmap)
library(DT)
library(ggplot2)
library(DESeq2)
library(limma)
library(edgeR)
library(ggpubr)
library(pathfindR)
library(viridis)
library(GSVA)
library(sva)

dir <- ("/data/github/Miltenyi_Biotec/new_Kallisto_Quant/")
samples <- read.table(file.path(dir, "metadata.csv"), 
                        sep=",", header=T, row.names="row")
                        
files <- file.path(dir, rownames(samples), "abundance.h5")
names(files) <- paste0(rownames(samples))

mart <- biomaRt::useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
results <- biomaRt::getBM(attributes = c("ensembl_transcript_id_version", "hgnc_symbol"), mart = mart)
tx2gene <- results[, 1:2]
txi <- tximport::tximport(files, type = "kallisto", tx2gene = tx2gene)

library(limma)
samples$group = as.factor(paste(samples$condition,samples$cell, sep="_"))

design = model.matrix( ~ 0 + group + patient, data=samples)

y <- edgeR::DGEList(txi$counts)

keep <- edgeR::filterByExpr(y, design)
y <- y[keep, ]

# normalize and run voom transformation
y <- edgeR::calcNormFactors(y)
logcpm <- edgeR::cpm(y, normalized.lib.sizes = T, log=TRUE)
scaled <- t(scale(t(logcpm)))
v <- limma::voom(y, design, plot = F)

## dds for combat seq later?
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~ patient + group)
dds <- DESeq(dds)
# small prefilter 
keep <- rowSums(counts(dds) >= 20) >= 4
dds <- dds[keep,]

#combat seq
# 1. use raw integer counts
# 2. normalise and transform after correction.
mm <- model.matrix(~group, data=samples)
# takes a long time, write to file
comb = sva::ComBat_seq(assay(dds), batch=dds$patient, group = dds$group)

## Found 4 batches
## Using full model in ComBat-seq.
## Adjusting for 7 covariate(s) or covariate level(s)
## Estimating dispersions
## Fitting the GLM model
## Shrinkage off - using GLM estimates for parameters
## Adjusting the data

dds = DESeqDataSetFromMatrix(comb, colData = samples, design =  ~ group )
dds = DESeq(dds)
vsd = vst(dds, blind=F)
comb = assay(vsd)
#write.table(comb, "/data/ORBSEN_PATHFINDR/combat_seq.txt", row.names = T, sep="\t", quote=F)
#comb = read.csv("/data/ORBSEN_PATHFINDR/combat_seq.txt", row.names = 1, header=T, sep="\t")

# limma removeBatch
# not as effective!
#vsd <- vst(dds, blind=FALSE)
#mat <- assay(vsd)
#mm <- model.matrix(~group, data=samples)
#mat <- limma::removeBatchEffect(mat, batch=vsd$patient, design=mm)

#for pca plots, subsetting things.
plot1 = read.csv("/data/ORBSEN_PATHFINDR/plot1.txt", header=T, row.names = 1, sep="\t")
plot2 = read.csv("/data/ORBSEN_PATHFINDR/plot2.txt", header=T, row.names = 1, sep="\t")

1 Cytokine Stimulated CD362+ Cells vs. Unstimulated CD362+ Cells

1.1 Differential Expression

Differentially expressed genes were detected using the limma::voom() package with the following parameters:

Model design: ~ 0 + patient + group where group is a combination of the cell type (CD362+) and condition (control/cytokine) denoting if the cells were stimulated. The effects of patient variability are controlled for in the model design.
No Log Fold Change (logFC) cut off was applied to the reported genes.
All genes reported are statistically significant with an Adjusted P-Value of <=0.05.

contrast <- limma::makeContrasts(
  CD362_pos = groupcytokine_ARIA_CD362 - groupcontrol_ARIA_CD362,
  CD362_neg = groupcytokine_ARIA - groupcontrol_ARIA,
  levels = colnames(design))

fit = limma::lmFit(v, design = design)
fit <- limma::eBayes(limma::contrasts.fit(limma::lmFit(v, design = design), contrast))

CD362_pos = limma::topTable(fit, number=Inf, p.value = 0.05, adjust.method = "BH", coef="CD362_pos")
CD362_pos = tibble::rownames_to_column(CD362_pos, "Gene")
CD362_neg = limma::topTable(fit, number=Inf, p.value = 0.05, adjust.method = "BH", coef="CD362_neg")
CD362_neg = tibble::rownames_to_column(CD362_neg, "Gene")

info = biomaRt::getBM(attributes=c("entrezgene_description", "hgnc_symbol"),
                      filters="hgnc_symbol",
                      values=CD362_pos$Gene,
                      mart=mart,
                      useCache=T)

CD362_pos = merge(CD362_pos, info, by.x="Gene", by.y="hgnc_symbol")

info = biomaRt::getBM(attributes=c("entrezgene_description", "hgnc_symbol"),
                      filters="hgnc_symbol",
                      values=CD362_neg$Gene,
                      mart=mart,
                      useCache=T)

CD362_neg = merge(CD362_neg, info, by.x="Gene", by.y="hgnc_symbol")

1.1.1 Up Regulated

CD362_pos = subset(CD362_pos, select=c(Gene, entrezgene_description, logFC, adj.P.Val))
pos = CD362_pos[which(CD362_pos$logFC>0),]
pos = pos[order(pos$logFC, decreasing=T),]
pos$logFC = round(pos$logFC, 3)
DT::datatable(pos, rownames = FALSE, options=list(scrollX=T))

Return to Table of Contents

1.1.2 Down Regulated

CD362_pos = subset(CD362_pos, select=c(Gene, entrezgene_description, logFC, adj.P.Val))
neg = CD362_pos[which(CD362_pos$logFC<0),]
neg = neg[order(neg$logFC, decreasing=F ),]
neg$logFC = round(neg$logFC, 3)
DT::datatable(neg, rownames = FALSE, options=list(scrollX=T))

Return to Table of Contents

1.2 Pathway Analysis Cytokine Stimulated CD362+ DEGs

Enrichment Analysis was performed using the pathfindR::run_pathfindR() package. Differentially expressed genes reported in CD362+ cytokine stimulated cells with abs(logFC>1.5) were used as input against the KEGG, Reactome and GO Biological Processes (BP), Molecular Function (MF) and Cellular Components (CC) databases.

100 iterations were run for each analysis - pathways considered statistically significant in >50 iterations are reported in the results below:

1.2.1 KEGG
1.2.2 Reactome
1.2.3 GO
Fold Enrichment > 1 = over represetation, < 1 = under representation

keggpathway1 = read.csv("/data/ORBSEN_PATHFINDR/CD362_pos_kegg.txt", header=T, sep="\t")
reactome1 = read.csv("/data/ORBSEN_PATHFINDR/CD362_pos_reactome.txt", header=T, sep="\t")
go1 = read.csv("/data/ORBSEN_PATHFINDR/CD362_pos_go-all.txt", header=T, sep="\t")

1.2.1 KEGG

keggpathway1 = keggpathway1[which(keggpathway1$occurrence>50), ]
DT::datatable(keggpathway1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))

Return to Table of Contents

1.2.2 Reactome

reactome1 = reactome1[which(reactome1$occurrence >50), ]
DT::datatable(reactome1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))

Return to Table of Contents

1.2.3 GO

go1 = go1[which(go1$occurrence>50), ]
DT::datatable(go1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))

Return to Table of Contents

1.3 Highlighted Pathways

Per request, specific pathways were investigated for over representation in CD362+ cytokine stimulated cells vs. unstimulated CD362+ cells:

1.3.1 Apoptosis
1.3.2 Caspases, Cytochrome c
1.3.3 “Find Me” Apoptosis Signals
1.3.4 Transmembrane Proteins
1.3.5 Monocyte/Macrophage (Innate Immunity)

Pathways have been clustered by similarity of biological relevance to aid visualisation.

Please note that the pathways selected below may not be representative of all pathways involved in for example, Apoptosis or Caspase pathways. Those with more domain knowledge may wish to utilize the search function in the complete list of pathways returned by KEGG, Reactome and Go in Section 1.2. Contact me if you want specific pathways included in plots.

test = rbind(keggpathway1, reactome1, go1)
search1 = dplyr::filter(test, grepl('apop|Apop', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)

1.3.1 Apoptosis

Results from KEGG, Reactome and GO were merged and searched for terms with “apoptosis” present in them.

save = c()
for(i in 1:nrow(clst_search1)){
  row = as.data.frame(clst_search1[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]

clst_search1 = clst_search1[!clst_search1$ID=="R-HSA-109581",]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(y=Term_Description,x=Fold_Enrichment)) +
  geom_point(aes(color=lowest_p,size=size)) +
  scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
  labs( title = "CD362+ Apoptosis Associated Pathways",
        subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
    x='Fold Enrichment', y=NULL,
    color='P-value',size='Gene\nnumber'
  ) + theme_linedraw() + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE)  +
    theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size=12),
    axis.text.y.left = element_text(size = 12),
    axis.title.x.bottom = element_text(size=10)
  )

DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))

Return to Table of Contents

1.3.2 Caspases/Cytochrome c

Results from KEGG, Reactome and GO were merged and searched for terms with “caspase” and “cytochrome” present in them.

search1 = dplyr::filter(test, grepl('casp|Casp|cytochr', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)

save = c()
for(i in 1:nrow(clst_search1)){
  row = as.data.frame(clst_search1[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]

clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
  geom_point(aes(color=lowest_p,size=size)) +
  scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
  labs( title = "CD362+ Caspase/Cytochrome c Associated Pathways",
        subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
    x='Fold Enrichment', y=NULL,
    color='P-value',size='Gene\nnumber'
  ) + theme_linedraw() + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE) +
    theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size=12),
    axis.text.y.left = element_text(size = 12),
    axis.title.x.bottom = element_text(size=10)
  )

DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=3))

Return to Table of Contents

1.3.3 ‘Find Me’ Apoptosis Signals

Results from KEGG, Reactome and GO were merged and searched for terms with “ATP”, “phosphatase”, “nucleotide”, “sphingolipid”, and “lysomal” present in them.

search1 = dplyr::filter(test, grepl('ATP|phospha|Phospha|nucleot|Nucleot|Sphing|sphing|Lyso|lyso', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)

save = c()
for(i in 1:nrow(clst_search1)){
  row = as.data.frame(clst_search1[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!grepl("repair|Repair|DNA", clst_search1$Term_Description), ]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
  geom_point(aes(color=lowest_p,size=size)) +
  scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
  labs( title = "CD362+ 'Find Me' Signal Associated Pathways",
        subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
    x='Fold Enrichment', y=NULL,
    color='P-value',size='Gene\nnumber'
  ) + theme_linedraw() + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE)  +
    theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size=12),
    axis.text.y.left = element_text(size = 12),
    axis.title.x.bottom = element_text(size=10)
  )

DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=3))

Return to Table of Contents

1.3.4 Transmembrane Proteins

Results from KEGG, Reactome and GO were merged and searched for pathways containing genes from the TMEM family.

search1 = dplyr::filter(test, grepl('TMEM', Up_regulated))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)

save = c()
for(i in 1:nrow(clst_search1)){
  row = as.data.frame(clst_search1[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!grepl("repair|Repair|DNA", clst_search1$Term_Description), ]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
  geom_point(aes(color=lowest_p,size=size)) +
  scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
  labs( title = "CD362+ Transmembrane Protein Associated Pathways",
        subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
    x='Fold Enrichment', y=NULL,
    color='P-value',size='Gene\nnumber'
  ) + theme_linedraw() + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE)  +
    theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size=12),
    axis.text.y.left = element_text(size = 12),
    axis.title.x.bottom = element_text(size=10)
  )

DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=3))

Return to Table of Contents

1.3.5 Monocyte/Macrophage (Innate Immunity)

Results from KEGG, Reactome and GO were merged and searched for terms with “Innate”, “neutrohil”, and “effector” present in them.

search1 = dplyr::filter(test, grepl('innate|Innate|Neutro|Effector', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)

save = c()
for(i in 1:nrow(clst_search1)){
  row = as.data.frame(clst_search1[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!grepl("SARS", clst_search1$Term_Description), ]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
  geom_point(aes(color=lowest_p,size=size)) +
  scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
  labs( title = "CD362+ Cytokine/Macrophage Associated Pathways",
        subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
    x='Fold Enrichment', y=NULL,
    color='P-value',size='Gene\nnumber'
  ) + theme_linedraw() + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE)  +
    theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size=12),
    axis.text.y.left = element_text(size = 12),
    axis.title.x.bottom = element_text(size=10)
  )

DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))

Return to Table of Contents

2 Cytokine Stimulated CD362- Cells vs. Unstimulated CD362- Cells

2.1 Differential Expression

Differentially expressed genes were detected using the limma::voom() package with the following parameters:

Model design: ~ 0 + patient + group where group is a combination of the cell type (CD362-) and condition (control/cytokine) denoting if the cells were stimulated. The effects of patient variability are controlled for in the model design.
No Log Fold Change (logFC) cut off was applied to the reported results.
All genes reported are statistically significant with an Adjusted P-Value of <=0.05.

2.1.1 Up Regulated

CD362_neg = subset(CD362_neg, select=c(Gene, entrezgene_description, logFC, adj.P.Val))
pos = CD362_neg[which(CD362_neg$logFC>0),]
pos = pos[order(pos$logFC, decreasing=T),]
pos$logFC = round(pos$logFC, 3)
DT::datatable(pos, rownames = FALSE, options=list(scrollX=T))

Return to Table of Contents

2.1.2 Down Regulated

CD362_neg = subset(CD362_neg, select=c(Gene, entrezgene_description, logFC, adj.P.Val))
neg = CD362_neg[which(CD362_neg$logFC<0),]
neg = neg[order(neg$logFC, decreasing=F ),]
neg$logFC = round(neg$logFC, 3)
DT::datatable(neg, rownames = FALSE, options=list(scrollX=T))

Return to Table of Contents

2.2 Pathway Analysis Cytokine Stimulated CD362- DEGs

2.2.1 KEGG
2.2.2 Reactome
2.2.3 GO

keggpathway2 = read.csv("/data/ORBSEN_PATHFINDR/CD362_neg_kegg.txt", header=T, sep="\t")
reactome2 = read.csv("/data/ORBSEN_PATHFINDR/CD362_neg_reactome.txt", header=T, sep="\t")
go2 = read.csv("/data/ORBSEN_PATHFINDR/CD362_neg_go-all.txt", header=T, sep="\t")

2.2.1 KEGG

DT::datatable(keggpathway2[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))

Return to Table of Contents

2.2.2 Reactome

DT::datatable(reactome2[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))

Return to Table of Contents

2.2.3 GO

DT::datatable(go2[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))

Return to Table of Contents

2.3 Highlighted Pathways

Per request, specific pathways were investigated for over representation in CD362- cytokine stimulated cells vs. unstimulated CD362- cells:

2.3.1 Apoptosis
2.3.2 Caspases, Cytochrome c
2.3.3 “Find Me” Apoptosis Signals
2.3.4 Transmembrane Proteins
2.3.5 Monocyte/Macrophage (Innate Immunity)

Pathways have been clustered by similarity of biological relevance to aid visualisation.

Please note that the pathways selected below may not be representative of all pathways involved in for example, Apoptosis or Caspase pathways. Those with more domain knowledge may wish to utilize the search function in the complete list of pathways returned by KEGG, Reactome and Go in Section 2.2. Contact me if you want specific pathways included in plots.

test = rbind(keggpathway2, reactome2, go2)
search1 = dplyr::filter(test, grepl('apop|Apop', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)

2.3.1 Apoptosis

Results from KEGG, Reactome and GO were merged and searched for terms with “apoptosis” present in them.

save = c()
for(i in 1:nrow(clst_search1)){
  row = as.data.frame(clst_search1[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]

clst_search1 = clst_search1[!clst_search1$ID=="R-HSA-109581",]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
  geom_point(aes(color=lowest_p,size=size)) +
  scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
  labs( title = "CD362+ Apoptosis Associated Pathways",
        subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
    x='Fold Enrichment', y=NULL,
    color='P-value',size='Gene\nnumber'
  ) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE)  +
    theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size=12),
    axis.text.y.left = element_text(size = 12),
    axis.title.x.bottom = element_text(size=10)
  )

DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))

Return to Table of Contents

2.3.2 Caspases/Cytochrome c

Results from KEGG, Reactome and GO were merged and searched for terms with “caspase” and “cytochrome” present in them.

search1 = dplyr::filter(test, grepl('casp|Casp|cytochr', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)

save = c()
for(i in 1:nrow(clst_search1)){
  row = as.data.frame(clst_search1[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]

clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
  geom_point(aes(color=lowest_p,size=size)) +
  scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
  labs( title = "CD362+ Caspase/Cytochrome c Associated Pathways",
        subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
    x='Fold Enrichment', y=NULL,
    color='P-value',size='Gene\nnumber'
  ) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE)  +
    theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size=12),
    axis.text.y.left = element_text(size = 12),
    axis.title.x.bottom = element_text(size=10)
  )

DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=3))

Return to Table of Contents

2.3.3 ‘Find Me’ Apoptosis Signals

Results from KEGG, Reactome and GO were merged and searched for terms with “ATP”, “phosphatase”, “nucleotide”, “sphingolipid”, and “lysomal” present in them.

search1 = dplyr::filter(test, grepl('ATP|phospha|Phospha|nucleot|Nucleot|Sphing|sphing|Lyso|lyso', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)

save = c()
for(i in 1:nrow(clst_search1)){
  row = as.data.frame(clst_search1[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!grepl("repair|Repair|DNA", clst_search1$Term_Description), ]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
  geom_point(aes(color=lowest_p,size=size)) +
  scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
  labs( title = "CD362+ 'Find Me' Signal Associated Pathways",
        subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
    x='Fold Enrichment', y=NULL,
    color='P-value',size='Gene\nnumber'
  ) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE)  +
    theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size=12),
    axis.text.y.left = element_text(size = 12),
    axis.title.x.bottom = element_text(size=10)
  )

DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=3))

Return to Table of Contents

2.3.4 Transmembrane Proteins

Results from KEGG, Reactome and GO were merged and searched for pathways containing genes from the TMEM family.

search1 = dplyr::filter(test, grepl('TMEM', Up_regulated))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)

save = c()
for(i in 1:nrow(clst_search1)){
  row = as.data.frame(clst_search1[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!grepl("repair|Repair|DNA", clst_search1$Term_Description), ]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
  geom_point(aes(color=lowest_p,size=size)) +
  scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
  labs( title = "CD362+ Transmembrane Protein Associated Pathways",
        subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
    x='Fold Enrichment', y=NULL,
    color='P-value',size='Gene\nnumber'
  ) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE)  +
    theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size=12),
    axis.text.y.left = element_text(size = 12),
    axis.title.x.bottom = element_text(size=10)
  )

DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=3))

Return to Table of Contents

2.3.5 Monocyte/Macrophage (Innate Immunity)

Results from KEGG, Reactome and GO were merged and searched for terms with “Innate”, “neutrohil”, and “effector” present in them.

search1 = dplyr::filter(test, grepl('innate|Innate|Neutro|Effector', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)

save = c()
for(i in 1:nrow(clst_search1)){
  row = as.data.frame(clst_search1[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
clst_search1$size = save
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!grepl("SARS", clst_search1$Term_Description), ]
clst_search1 = clst_search1[order(clst_search1$Fold_Enrichment),]
ggplot(clst_search1, aes(x=Fold_Enrichment,y=Term_Description)) +
  geom_point(aes(color=lowest_p,size=size)) +
  scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
  labs( title = "CD362+ Cytokine/Macrophage Associated Pathways",
        subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
    x='Fold Enrichment', y=NULL,
    color='P-value',size='Gene\nnumber'
  ) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) + ggplot2::facet_grid(clst_search1$Cluster ~ ., scales = "free_y", space = "free", drop = TRUE)  +
    theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size=12),
    axis.text.y.left = element_text(size = 12),
    axis.title.x.bottom = element_text(size=10)
  )

DT::datatable(clst_search1[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T, pageLength=3))

Return to Table of Contents

3 Comparing Highlighted Pathways

The highlighted pathways in Section 1.3 and Section 2.3 were selected for direct comparison between CD362+ and CD362- cells using Gene Set Variation Analysis (GSVA). Briefly, GSVA assesses the relative enrichment of gene sets across samples using a non-parametric approach. Conceptually, GSVA transforms a gene by sample expression matrix into a geneset by sample pathway enrichment matrix. This facilitates many forms of statistical analysis in the ‘space’ of pathways rather than genes, providing a higher level of interpretability.

The input gene expression matrix to GSVA was batch corrected using sva::Combat-Seq to regress out the effect of patient variability prior to normalisation and transformation using DESeq2::vst().

The results of GSVA are clustered and visualised using heatmaps to identify patterns of over/under enrichment in CD362+ or CD362- cells. A formal t.test is conducted between CD362+ and CD362- groups and reported in the tables below.

N.B: Prior to running GSVA, msigdbr() is used to download and format gene sets from KEGG, Reactome and GO. Unfortunately when extracting pathways from msigdbr() that are deemed significant by pathfindR using the pathway ID (e.g hsa04210), not every pathway is returned resulting in a small loss of information.

3.1 Apoptosis
3.2 Caspases, Cytochrome c
3.3 “Find Me” Apoptosis Signals
3.4 Transmembrane Proteins
3.5 Monocyte/Macrophage (Innate Immunity)

3.1 Apoptosis

The heatmap displaying GSVA enrichment scores shows decent sample clustering of CD362+ and CD362- cells indicating that there is some underlying structure in the levels of enrichment driving separation between the two cytokine stimulated cell types:

test = rbind(keggpathway1, reactome1, go1)
test2 = rbind(keggpathway2, reactome2, go2)
# ORBCEL undergo extrinsic  (FasL) and intrinsic (Bcl2) mediated apoptosis/anoikis within 2/3 hrs of infusion
search1 = dplyr::filter(test, grepl('apop|Apop', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!clst_search1$ID=="R-HSA-109581",]

search2 = dplyr::filter(test2, grepl('apop|Apop', Term_Description))
clst_search2 = cluster_enriched_terms(search2, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search2 = clst_search2[which(clst_search2$occurrence>50), ]
clst_search2 = clst_search2[!clst_search2$ID=="R-HSA-109581",]

sets <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:KEGG")
sets1 <-  msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
sets2 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:BP")
sets3 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:MF")
sets4 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:CC")
master = rbind(sets, sets1, sets2, sets3, sets4)
# use all sets returned by both pathway analysis
my_sets = master[which(master$gs_exact_source %in% c(clst_search1$ID, clst_search2$ID)),]
ids = unique(my_sets$gs_exact_source)
my_sets = split(
  my_sets$gene_symbol, # The genes we want split into pathways
  my_sets$gs_name # The pathways made as the higher levels of the list
)

cases = rownames(plot1)[seq(2,nrow(plot1),2)]
control = rownames(plot2)[seq(2,nrow(plot2),2)]
var = GSVA::gsva(comb[,c(cases,control)], my_sets, method = "gsva", kcdf = "Gaussian",  min.sz = 5, max.sz = 500, mx.diff = TRUE, verbose = FALSE)

rownames(var) = sub("^[^_]*_", "", rownames(var))

ann_col = data.frame(row.names = colnames(var),
                     Cell = c(rep("CD362+  ",4),rep("CD362-  ",4)))

col <- c("forestgreen", "purple")
names(col) <- c("CD362+  ", "CD362-  ")
ann_clr <- list(Cell = col)

var = t(scale(t(var), center = T))

pheatmap(var,
         cluster_rows = T, 
         cluster_cols = T,
         scale = "none",
         annotation_col=ann_col,
         annotation_colors = ann_clr,
         color = hcl.colors(100, "RdBu",rev=T))

The t.test comparing the mean enrichment scores show that Apoptosis, TNFR1 Induced Proapoptotic Signalling, Apoptotic Execution Phase, Intrinsic Pathway For Apoptosis, Caspase Activation Via Extrinsic Apoptotic Signalling Pathway and Extrinsic Apoptotic Signalling Pathway have higher enrichment scores in CD362+ cells than CD362- cells, however none of the differences in means are statistically significant:

pathway = c()
pval = c()
pos = c()
neg = c()

for(i in 1:nrow(var)){
  name = rownames(var)[i]
  pathway = c(pathway, name)
  row = as.data.frame(var[i,])
  t = t.test(row[1:4,], row[5:8,], alternative = "two.sided")
  pval = c(pval, t$p.value)
  pos = c(pos, as.numeric(t$estimate[1]))
  neg =  c(neg, as.numeric(t$estimate[2]))
}

adj = p.adjust(pval, method = "BH")

result = data.frame(row.names = pathway,
                    mean_CD362_positive = pos,
                    mean_CD362_negative = neg,
                    p.value = pval,
                    adj.p.val = adj)

result = result[order(result$mean_CD362_positive, decreasing = T),]
DT::datatable(result, rownames = T, options=list(scrollX=T))

The bubble plot overlaying CD362+ and CD362- Fold Enrichment for Apoptosis pathways is difficult to assign meaning to - the larger fold enrichment changes are indicative of ‘higher’ group membership in a gene set rather than higher global expression for that pathway. Nonetheless, as the inputs for enrichment analysis are statistically significant genes (logFC > 1.5 & Adj.P.Value <= 0.05) we can conclude that Cytokine stimulated CD362+ cells have more significant differentially expressed genes involved in Apoptosis associated pathways than Cytokine stimulated CD362- cells:

plota = clst_search1[which(clst_search1$ID %in% ids),]
plotb = clst_search2[which(clst_search2$ID %in% ids),]

save = c()
for(i in 1:nrow(plota)){
  row = as.data.frame(plota[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
plota$size = save

save = c()
for(i in 1:nrow(plotb)){
  row = as.data.frame(plotb[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
plotb$size = save

plota = subset(plota, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plota$group = "CD362+"
plotb = subset(plotb, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plotb$group = "CD362-"

plot = rbind(plota, plotb)

ggplot(plot, aes(x=Fold_Enrichment,y=Term_Description)) +
  geom_point(aes(color=group,size=size), position=position_jitter(h=0.0,w=0.0), alpha = 0.6) +
  scale_color_manual(values = c("CD362+" = "forestgreen",
                                "CD362-"="purple")) +
  #scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
  labs( title = "Apoptosis Associated Pathways",
        subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
    x='Fold Enrichment', y=NULL,
    color='Cell Type',size='Gene\nnumber'
  ) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) +
    theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size=12),
    axis.text.y.left = element_text(size = 12),
    axis.title.x.bottom = element_text(size=10)
  )

View CD362+ Apoptosis Associated Gene Sets
View CD362- Apoptosis Associated Gene Sets
Return to Table of Contents

Gene Level Heatmaps

Gene expression heatmaps on a per pathway basis between CD362+ and CD362- samples are provided to illustrate the similarity of the two cell types.

KEGG_APOPTOSIS

genes = my_sets$KEGG_APOPTOSIS
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

REACTOME_APOPTOTIC_EXECUTION_PHASE

genes = my_sets$REACTOME_APOPTOTIC_EXECUTION_PHASE
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOBP_ACTIVATION_OF_CYSTEINE_TYPE_ENDOPEPTIDASE_ACTIVITY_INVOLVED_IN_APOPTOTIC_PROCESS

genes = my_sets$GOBP_ACTIVATION_OF_CYSTEINE_TYPE_ENDOPEPTIDASE_ACTIVITY_INVOLVED_IN_APOPTOTIC_PROCESS
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

REACTOME_CASPASE_ACTIVATION_VIA_EXTRINSIC_APOPTOTIC_SIGNALLING_PATHWAY

genes = my_sets$REACTOME_CASPASE_ACTIVATION_VIA_EXTRINSIC_APOPTOTIC_SIGNALLING_PATHWAY
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

REACTOME_DEFECTIVE_INTRINSIC_PATHWAY_FOR_APOPTOSIS

genes = my_sets$REACTOME_DEFECTIVE_INTRINSIC_PATHWAY_FOR_APOPTOSIS
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOBP_EXTRINSIC_APOPTOTIC_SIGNALING_PATHWAY

genes = my_sets$GOBP_EXTRINSIC_APOPTOTIC_SIGNALING_PATHWAY
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOBP_INTRINSIC_APOPTOTIC_SIGNALING_PATHWAY_IN_RESPONSE_TO_DNA_DAMAGE

genes = my_sets$GOBP_INTRINSIC_APOPTOTIC_SIGNALING_PATHWAY_IN_RESPONSE_TO_DNA_DAMAGE
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

REACTOME_INTRINSIC_PATHWAY_FOR_APOPTOSIS

genes = my_sets$REACTOME_INTRINSIC_PATHWAY_FOR_APOPTOSIS
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

REACTOME_TNFR1_INDUCED_PROAPOPTOTIC_SIGNALING

genes = my_sets$REACTOME_TNFR1_INDUCED_PROAPOPTOTIC_SIGNALING
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

Predictive Features

Elastic net logistic regression was performed on genes involved in the apoptotic pathways listed above to arrive at a set of genes capable of delineating Cytokine stimulated CD362+ cells vs. Cytokine stimulated CD362- cells:

genes = unique(unlist(my_sets))
mat = comb[which(rownames(comb) %in% genes), c(cases,control)]

mat = as.data.frame(scale(t(mat), center = T, scale=T))
mat$Group = factor(c(rep("CD362+",4),rep("CD362-",4)))
mat = mat %>% select_if(~ ! any(is.na(.)))

model = glmnet::cv.glmnet(as.matrix(mat[,c(1:ncol(mat)-1)]), mat$Group, family = "binomial", alpha = 0.3, lambda = NULL)
sig_feat = coef(model, s="lambda.min")
sig_df = data.frame(name = sig_feat@Dimnames[[1]][sig_feat@i + 1], coefficient = sig_feat@x)                          
sig_df = sig_df[2:nrow(sig_df),] 

mat = as.data.frame(t(mat))
mat = mat[which(rownames(mat) %in% sig_df$name),]
names = rownames(mat)
mat = as.data.frame(sapply(mat, as.numeric))
rownames(mat) = names

ann_col = data.frame(row.names = colnames(var),
                     Cell = c(rep("CD362+  ",4),rep("CD362-  ",4)))

col <- c("forestgreen", "purple")
names(col) <- c("CD362+  ", "CD362-  ")
ann_clr <- list(Cell = col)

mat = t(scale(t(mat), center = T, scale = T))

pheatmap(mat,
         cluster_rows = T, 
         cluster_cols = T,
         scale = "none",
         annotation_col=ann_col,
         annotation_colors = ann_clr,
         color = hcl.colors(100, "RdBu",rev=T))

3.2 Caspases/Cytochrome c

The heatmap displaying GSVA enrichment scores for Caspase/Cytochrome C pathways shows good sample separation between stimulated CD362+ vs. stimulated CD362- cells, however the results are contradictory: (1) Up-regulation of Negative regulation of release of cytochrome c from mitochondria suggests that cytochrome c release is reduced in stimulated CD362+ cells however, (2) Up-regulation of caspase activation via death receptors and extrinsic apoptotic signalling show that stimulated CD362+ cells suggests apoptotic activity is upregulated.
It is worth while investigating the differentially expressed genes listed in CD362+ Caspase/Cytochrome c Associated Gene Sets.
Of note, the pathway Negative regulation of release of cytochrome c from mitochondria contains only 17 genes in its set (of which three are differentially expressed). The small size of this gene set is the likely cause of it being labelled as a significantly enriched pathway. Take these results with a pinch of salt.

search1 = dplyr::filter(test, grepl('casp|Casp|cytochr', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]

search2 = dplyr::filter(test2, grepl('casp|Casp|cytochr', Term_Description))
clst_search2 = cluster_enriched_terms(search2, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search2 = clst_search2[which(clst_search2$occurrence>50), ]

sets <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:KEGG")
sets1 <-  msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
sets2 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:BP")
sets3 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:MF")
sets4 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:CC")
master = rbind(sets, sets1, sets2, sets3, sets4)
# use all sets returned by both pathway analysis
my_sets = master[which(master$gs_exact_source %in% c(clst_search1$ID, clst_search2$ID)),]
ids = unique(my_sets$gs_exact_source)
my_sets = split(
  my_sets$gene_symbol, # The genes we want split into pathways
  my_sets$gs_name # The pathways made as the higher levels of the list
)

cases = rownames(plot1)[seq(2,nrow(plot1),2)]
control = rownames(plot2)[seq(2,nrow(plot2),2)]
var = GSVA::gsva(comb[,c(cases,control)], my_sets, method = "gsva", kcdf = "Poisson",  min.sz = 5, max.sz = 500, mx.diff = TRUE, verbose = FALSE)

rownames(var) = sub("^[^_]*_", "", rownames(var))

ann_col = data.frame(row.names = colnames(var),
                     Cell = c(rep("CD362+  ",4),rep("CD362-  ",4)))

col <- c("forestgreen", "purple")
names(col) <- c("CD362+  ", "CD362-  ")
ann_clr <- list(Cell = col)

var = t(scale(t(var), center = T))

pheatmap(var,
         cluster_rows = T, 
         cluster_cols = T, 
         scale="none",
         annotation_col=ann_col,
         annotation_colors = ann_clr,
         color = hcl.colors(100, "RdBu",rev=T))

All three pathways are more highly enriched in stimulated CD362+ cells vs. stimulated CD362- cells, however none are statistically significant.

pathway = c()
pval = c()
pos = c()
neg = c()

for(i in 1:nrow(var)){
  name = rownames(var)[i]
  pathway = c(pathway, name)
  row = as.data.frame(var[i,])
  t = t.test(row[1:4,], row[5:8,], alternative = "two.sided")
  pval = c(pval, t$p.value)
  pos = c(pos, as.numeric(t$estimate[1]))
  neg =  c(neg, as.numeric(t$estimate[2]))
}

adj = p.adjust(pval, method = "BH")

result = data.frame(row.names = pathway,
                    mean_CD362_positive = pos,
                    mean_CD362_negative = neg,
                    p.value = pval,
                    adj.p.val = adj)

DT::datatable(result, rownames = T, options=list(scrollX=T))

The fold enrichment scores for Negative regulation of release of cytochrome c from mitochondria are nearly identical for stimulated CD362+ and stimulated CD362- cells. Referring to my previous point about the background gene set size, this pathway is somewhat of a red herring.
Encouragingly, stimulated CD362+ cells exhibit higher fold enrichment changes in pathways pertaining to capsase activation compared to stimulated CD362- cells.

plota = clst_search1[which(clst_search1$ID %in% ids),]
plotb = clst_search2[which(clst_search2$ID %in% ids),]

save = c()
for(i in 1:nrow(plota)){
  row = as.data.frame(plota[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
plota$size = save

save = c()
for(i in 1:nrow(plotb)){
  row = as.data.frame(plotb[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
plotb$size = save

plota = subset(plota, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plota$group = "CD362+"
plotb = subset(plotb, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plotb$group = "CD362-"

plot = rbind(plota, plotb)

ggplot(plot, aes(x=Fold_Enrichment,y=Term_Description)) +
  geom_point(aes(color=group,size=size), position=position_jitter(h=0.0,w=0.0), alpha = 0.6) +
  scale_color_manual(values = c("CD362+" = "forestgreen",
                                "CD362-"="purple")) +
  #scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
  labs( title = "Caspase/Cytochrome C Associated Pathways",
        subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
    x='Fold Enrichment', y=NULL,
    color='Cell Type',size='Gene\nnumber'
  ) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) +
    theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size=12),
    axis.text.y.left = element_text(size = 12),
    axis.title.x.bottom = element_text(size=10)
  )

View CD362+ Caspase/Cytochrome c Associated Gene Sets
View CD362- Caspase/Cytochrome c Associated Gene Sets
Return to Table of Contents

Gene Level Heatmaps

Of note, the Negative regulation of release of cytochrome c from mitochondria pathway shows no structure when plotting the normalised gene expression of all genes in the pathway. This corroborates with my previous points and we can build the case that this pathway is noise.
Stimulated CD362+ cells cluster well in the heatmap when plotting the normalised gene expression for genes involved in capase activation. Using logistic regression in the section below we can pull out the predictive features that are driving this clustering pattern.

GOBP_NEGATIVE_REGULATION_OF_RELEASE_OF_CYTOCHROME_C_FROM_MITOCHONDRIA

genes = my_sets$GOBP_NEGATIVE_REGULATION_OF_RELEASE_OF_CYTOCHROME_C_FROM_MITOCHONDRIA
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

REACTOME_CASPASE_ACTIVATION_VIA_DEATH_RECEPTORS_IN_THE_PRESENCE_OF_LIGAND

genes = my_sets$REACTOME_CASPASE_ACTIVATION_VIA_DEATH_RECEPTORS_IN_THE_PRESENCE_OF_LIGAND
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

REACTOME_CASPASE_ACTIVATION_VIA_EXTRINSIC_APOPTOTIC_SIGNALLING_PATHWAY

genes = my_sets$REACTOME_CASPASE_ACTIVATION_VIA_EXTRINSIC_APOPTOTIC_SIGNALLING_PATHWAY
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

Predictive Features

Elastic net logistic regression was performed on genes involved in the Caspase, Cytochrome C pathways listed above was used to arrive at a set of genes capable of delineating Cytokine stimulated CD362+ cells vs. Cytokine stimulated CD362- cells:

genes = unique(unlist(my_sets))
mat = comb[which(rownames(comb) %in% genes), c(cases,control)]

mat = as.data.frame(scale(t(mat), center = T, scale=T))
mat$Group = factor(c(rep("CD362+",4),rep("CD362-",4)))
mat = mat %>% select_if(~ ! any(is.na(.)))
model = glmnet::cv.glmnet(as.matrix(mat[,c(1:ncol(mat)-1)]), mat$Group, family = "binomial", alpha = 0.1)
sig_feat = coef(model, s="lambda.min")
sig_df = data.frame(name = sig_feat@Dimnames[[1]][sig_feat@i + 1], coefficient = sig_feat@x)                          
sig_df = sig_df[2:nrow(sig_df),] 

mat = as.data.frame(t(mat))
mat = mat[which(rownames(mat) %in% sig_df$name),]
names = rownames(mat)
mat = as.data.frame(sapply(mat, as.numeric))
rownames(mat) = names

ann_col = data.frame(row.names = colnames(var),
                     Cell = c(rep("CD362+  ",4),rep("CD362-  ",4)))

col <- c("forestgreen", "purple")
names(col) <- c("CD362+  ", "CD362-  ")
ann_clr <- list(Cell = col)

mat = t(scale(t(mat), center = T, scale = T))

pheatmap(mat,
         cluster_rows = T, 
         cluster_cols = T,
         scale="column",
         annotation_col=ann_col,
         annotation_colors = ann_clr,
         color = hcl.colors(100, "RdBu",rev=T))

3.3 ‘Find Me’ Apoptosis Signals

Uncertain if the pathways included in my search terms are relevant. Please use the search bar in the results for KEGG, Reactome and GO pathways. If pathways of interest are found, contact me and I can repeat the analysis below with updated pathways.

search1 = dplyr::filter(test, grepl('ATP|phospha|Phospha|nucleot|Nucleot|Sphing|sphing|Lyso|lyso', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]
clst_search1 = clst_search1[!grepl("repair|Repair|DNA", clst_search1$Term_Description), ]

search2 = dplyr::filter(test2, grepl('ATP|phospha|Phospha|nucleot|Nucleot|Sphing|sphing|Lyso|lyso', Term_Description))
clst_search2 = cluster_enriched_terms(search2, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search2 = clst_search2[which(clst_search2$occurrence>50), ]
clst_search2 = clst_search2[!grepl("repair|Repair|DNA", clst_search2$Term_Description), ]

sets <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:KEGG")
sets1 <-  msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
sets2 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:BP")
sets3 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:MF")
sets4 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:CC")
master = rbind(sets, sets1, sets2, sets3, sets4)
# use all sets returned by both pathway analysis
my_sets = master[which(master$gs_exact_source %in% c(clst_search1$ID, clst_search2$ID)),]
ids = unique(my_sets$gs_exact_source)
my_sets = split(
  my_sets$gene_symbol, # The genes we want split into pathways
  my_sets$gs_name # The pathways made as the higher levels of the list
)

cases = rownames(plot1)[seq(2,nrow(plot1),2)]
control = rownames(plot2)[seq(2,nrow(plot2),2)]
var = GSVA::gsva(comb[,c(cases,control)], my_sets, method = "gsva", kcdf = "Gaussian",  min.sz = 5, max.sz = 500, mx.diff = TRUE, verbose = FALSE)

rownames(var) = sub("^[^_]*_", "", rownames(var))

ann_col = data.frame(row.names = colnames(var),
                     Cell = c(rep("CD362+  ",4),rep("CD362-  ",4)))

col <- c("forestgreen", "purple")
names(col) <- c("CD362+  ", "CD362-  ")
ann_clr <- list(Cell = col)

var = scale(t(var), center = T)
var = t(var)

pheatmap(var,
         cluster_rows = T, 
         cluster_cols = T,
         scale="none",
         annotation_col=ann_col,
         annotation_colors = ann_clr,
         color = hcl.colors(100, "RdBu",rev=T))

pathway = c()
pval = c()
pos = c()
neg = c()

for(i in 1:nrow(var)){
  name = rownames(var)[i]
  pathway = c(pathway, name)
  row = as.data.frame(var[i,])
  t = t.test(row[1:4,], row[5:8,], alternative = "two.sided")
  pval = c(pval, t$p.value)
  pos = c(pos, as.numeric(t$estimate[1]))
  neg =  c(neg, as.numeric(t$estimate[2]))
}

adj = p.adjust(pval, method = "BH")

result = data.frame(row.names = pathway,
                    mean_CD362_positive = pos,
                    mean_CD362_negative = neg,
                    p.value = pval,
                    adj.p.val = adj)

DT::datatable(result, rownames = T, options=list(scrollX=T))

plota = clst_search1[which(clst_search1$ID %in% ids),]
plotb = clst_search2[which(clst_search2$ID %in% ids),]

save = c()
for(i in 1:nrow(plota)){
  row = as.data.frame(plota[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
plota$size = save

save = c()
for(i in 1:nrow(plotb)){
  row = as.data.frame(plotb[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
plotb$size = save

plota = subset(plota, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plota$group = "CD362+"
plotb = subset(plotb, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plotb$group = "CD362-"

plot = rbind(plota, plotb)

ggplot(plot, aes(x=Fold_Enrichment,y=Term_Description)) +
  geom_point(aes(color=group,size=size), position=position_jitter(h=0.0,w=0.0), alpha = 0.6) +
  scale_color_manual(values = c("CD362+" = "forestgreen",
                                "CD362-"="purple")) +
  #scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
  labs( title = "'Find Me' Apoptosis Signals",
        subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
    x='Fold Enrichment', y=NULL,
    color='Cell Type',size='Gene\nnumber'
  ) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) +
    theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size=12),
    axis.text.y.left = element_text(size = 12),
    axis.title.x.bottom = element_text(size=10)
  )

View CD362+ ‘Find Me’ Apoptosis Signal Associated Gene Sets
View CD362- ‘Find Me’ Apoptosis Signal Associated Gene Sets
Return to Table of Contents

Gene Level Heatmaps

GOBP_POSITIVE_REGULATION_OF_PHOSPHATIDYLINOSITOL_3_KINASE_SIGNALING

genes = my_sets$GOBP_POSITIVE_REGULATION_OF_PHOSPHATIDYLINOSITOL_3_KINASE_SIGNALING
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOCC_AUTOLYSOSOME

genes = my_sets$GOCC_AUTOLYSOSOME
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOMF_PHOSPHATIDYLINOSITOL_3_4_5_TRISPHOSPHATE_BINDING

genes = my_sets$GOMF_PHOSPHATIDYLINOSITOL_3_4_5_TRISPHOSPHATE_BINDING
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOMF_PHOSPHATIDYLINOSITOL_3_KINASE_BINDING

genes = my_sets$GOMF_PHOSPHATIDYLINOSITOL_3_KINASE_BINDING
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOMF_PROTEIN_TYROSINE_PHOSPHATASE_ACTIVITY

genes = my_sets$GOMF_PROTEIN_TYROSINE_PHOSPHATASE_ACTIVITY
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_GLOBO_SERIES

genes = my_sets$KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_GLOBO_SERIES
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

REACTOME_METABOLISM_OF_NUCLEOTIDES

genes = my_sets$REACTOME_METABOLISM_OF_NUCLEOTIDES
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

REACTOME_NUCLEOTIDE_BINDING_DOMAIN_LEUCINE_RICH_REPEAT_CONTAINING_RECEPTOR_NLR_SIGNALING_PATHWAYS

genes = my_sets$REACTOME_NUCLEOTIDE_BINDING_DOMAIN_LEUCINE_RICH_REPEAT_CONTAINING_RECEPTOR_NLR_SIGNALING_PATHWAYS
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

REACTOME_NUCLEOTIDE_BIOSYNTHESIS

genes = my_sets$REACTOME_NUCLEOTIDE_BIOSYNTHESIS
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

REACTOME_NUCLEOTIDE_SALVAGE

genes = my_sets$REACTOME_NUCLEOTIDE_SALVAGE
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

REACTOME_RESOLUTION_OF_AP_SITES_VIA_THE_MULTIPLE_NUCLEOTIDE_PATCH_REPLACEMENT_PATHWAY

genes = my_sets$REACTOME_RESOLUTION_OF_AP_SITES_VIA_THE_MULTIPLE_NUCLEOTIDE_PATCH_REPLACEMENT_PATHWAY
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

Predictive Features

No predictive features were found using elastic net logistic regression, i.e no coefficients retunred by model..

3.4 Transmembrane Proteins

search1 = dplyr::filter(test, grepl('TMEM', Up_regulated))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]

search2 = dplyr::filter(test2, grepl('TMEM', Up_regulated))
clst_search2 = cluster_enriched_terms(search2, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search2 = clst_search2[which(clst_search2$occurrence>50), ]

sets <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:KEGG")
sets1 <-  msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
sets2 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:BP")
sets3 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:MF")
sets4 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:CC")
master = rbind(sets, sets1, sets2, sets3, sets4)
# use all sets returned by both pathway analysis
my_sets = master[which(master$gs_exact_source %in% c(clst_search1$ID, clst_search2$ID)),]
ids = unique(my_sets$gs_exact_source)
my_sets = split(
  my_sets$gene_symbol, # The genes we want split into pathways
  my_sets$gs_name # The pathways made as the higher levels of the list
)

cases = rownames(plot1)[seq(2,nrow(plot1),2)]
control = rownames(plot2)[seq(2,nrow(plot2),2)]
var = GSVA::gsva(comb[,c(cases,control)], my_sets, method = "gsva", kcdf = "Gaussian",  min.sz = 5, max.sz = 500, mx.diff = TRUE, verbose = FALSE)

rownames(var) = sub("^[^_]*_", "", rownames(var))

ann_col = data.frame(row.names = colnames(var),
                     Cell = c(rep("CD362+  ",4),rep("CD362-  ",4)))

col <- c("forestgreen", "purple")
names(col) <- c("CD362+  ", "CD362-  ")
ann_clr <- list(Cell = col)

var = t(scale(t(var), scale=T, center = T))

pheatmap(var,
         cluster_rows = T, 
         cluster_cols = T,
         scale="none",
         annotation_col=ann_col,
         annotation_colors = ann_clr,
         color = hcl.colors(100, "RdBu",rev=T))

pathway = c()
pval = c()
pos = c()
neg = c()

for(i in 1:nrow(var)){
  name = rownames(var)[i]
  pathway = c(pathway, name)
  row = as.data.frame(var[i,])
  t = t.test(row[1:4,], row[5:8,], alternative = "two.sided")
  pval = c(pval, t$p.value)
  pos = c(pos, as.numeric(t$estimate[1]))
  neg =  c(neg, as.numeric(t$estimate[2]))
}

adj = p.adjust(pval, method = "BH")

result = data.frame(row.names = pathway,
                    mean_CD362_positive = pos,
                    mean_CD362_negative = neg,
                    p.value = pval,
                    adj.p.val = adj)

DT::datatable(result, rownames = T, options=list(scrollX=T))

plota = clst_search1[which(clst_search1$ID %in% ids),]
plotb = clst_search2[which(clst_search2$ID %in% ids),]

save = c()
for(i in 1:nrow(plota)){
  row = as.data.frame(plota[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
plota$size = save

save = c()
for(i in 1:nrow(plotb)){
  row = as.data.frame(plotb[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
plotb$size = save

plota = subset(plota, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plota$group = "CD362+"
plotb = subset(plotb, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plotb$group = "CD362-"

plot = rbind(plota, plotb)

ggplot(plot, aes(x=Fold_Enrichment,y=Term_Description)) +
  geom_point(aes(color=group,size=size), position=position_jitter(h=0.0,w=0.0), alpha = 0.6) +
  scale_color_manual(values = c("CD362+" = "forestgreen",
                                "CD362-"="purple")) +
  #scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
  labs( title = "Transmembrane Associated Pathways",
        subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
    x='Fold Enrichment', y=NULL,
    color='Cell Type',size='Gene\nnumber'
  ) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) +
    theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size=12),
    axis.text.y.left = element_text(size = 12),
    axis.title.x.bottom = element_text(size=10)
  )

View CD362+ Transmembrane Associated Gene Sets
View CD362- Transmembrane Associated Gene Sets
Return to Table of Contents

Gene Level Heatmaps

GOBP_POSITIVE_REGULATION_OF_I_KAPPAB_KINASE_NF_KAPPAB_SIGNALING

genes = my_sets$GOBP_POSITIVE_REGULATION_OF_I_KAPPAB_KINASE_NF_KAPPAB_SIGNALING
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOBP_POSITIVE_REGULATION_OF_INTERLEUKIN_1_BETA_PRODUCTION

genes = my_sets$GOBP_POSITIVE_REGULATION_OF_INTERLEUKIN_1_BETA_PRODUCTION
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOBP_POSITIVE_REGULATION_OF_INTERLEUKIN_6_PRODUCTION

genes = my_sets$GOBP_POSITIVE_REGULATION_OF_INTERLEUKIN_6_PRODUCTION
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOCC_CENTRIOLAR_SATELLITE

genes = my_sets$GOCC_CENTRIOLAR_SATELLITE
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOCC_ENDOPLASMIC_RETICULUM_LUMEN

genes = my_sets$GOCC_ENDOPLASMIC_RETICULUM_LUMEN
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOCC_SPECIFIC_GRANULE_MEMBRANE

genes = my_sets$GOCC_SPECIFIC_GRANULE_MEMBRANE
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOCC_TRANS_GOLGI_NETWORK_MEMBRANE

genes = my_sets$GOCC_TRANS_GOLGI_NETWORK_MEMBRANE
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

REACTOME_REGULATION_OF_INSULIN_LIKE_GROWTH_FACTOR_IGF_TRANSPORT_AND_UPTAKE_BY_INSULIN_LIKE_GROWTH_FACTOR_BINDING_PROTEINS_IGFBPS

genes = my_sets$REACTOME_REGULATION_OF_INSULIN_LIKE_GROWTH_FACTOR_IGF_TRANSPORT_AND_UPTAKE_BY_INSULIN_LIKE_GROWTH_FACTOR_BINDING_PROTEINS_IGFBPS
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

Predictive Features

Elastic net logistic regression was performed on pathways with “TMEM” genes listed above to arrive at a set of genes capable of delineating Cytokine stimulated CD362+ cells vs. Cytokine stimulated CD362- cells:

genes = unique(unlist(my_sets))
mat = comb[which(rownames(comb) %in% genes), c(cases,control)]

mat = as.data.frame(scale(t(mat), center = T, scale=T))
mat$Group = factor(c(rep("CD362+",4),rep("CD362-",4)))
mat = mat %>% select_if(~ ! any(is.na(.)))
model = glmnet::cv.glmnet(as.matrix(mat[,c(1:ncol(mat)-1)]), mat$Group, family = "binomial", alpha = 0.1)
sig_feat = coef(model, s="lambda.min")
sig_df = data.frame(name = sig_feat@Dimnames[[1]][sig_feat@i + 1], coefficient = sig_feat@x)                          
sig_df = sig_df[2:nrow(sig_df),] 

mat = as.data.frame(t(mat))
mat = mat[which(rownames(mat) %in% sig_df$name),]
names = rownames(mat)
mat = as.data.frame(sapply(mat, as.numeric))
rownames(mat) = names

ann_col = data.frame(row.names = colnames(var),
                     Cell = c(rep("CD362+  ",4),rep("CD362-  ",4)))

col <- c("forestgreen", "purple")
names(col) <- c("CD362+  ", "CD362-  ")
ann_clr <- list(Cell = col)

mat = t(scale(t(mat), center = T, scale = T))

pheatmap(mat,
         cluster_rows = T, 
         cluster_cols = T,
         scale="column",
         annotation_col=ann_col,
         annotation_colors = ann_clr,
         color = hcl.colors(100, "RdBu",rev=T))

3.5 Monocyte/Macrophage (Innate Pathway)

Clustering of the GSVA enrichment scores for each sample reveals no underlying structure delineating stimulated CD362+ vs. stimulated CD362- cells.

search1 = dplyr::filter(test, grepl('innate|Innate|Neutro|Effector', Term_Description))
clst_search1 = cluster_enriched_terms(search1, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search1 = clst_search1[which(clst_search1$occurrence>50), ]

search2 = dplyr::filter(test2, grepl('innate|Innate|Neutro|Effector', Term_Description))
clst_search2 = cluster_enriched_terms(search2, plot_dend = F, plot_clusters_graph = F, use_description = T, use_active_snw_genes = T)
clst_search2 = clst_search2[which(clst_search2$occurrence>50), ]

sets <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:KEGG")
sets1 <-  msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
sets2 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:BP")
sets3 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:MF")
sets4 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:CC")
master = rbind(sets, sets1, sets2, sets3, sets4)
# use all sets returned by both pathway analysis
my_sets = master[which(master$gs_exact_source %in% c(clst_search1$ID, clst_search2$ID)),]
ids = unique(my_sets$gs_exact_source)
my_sets = split(
  my_sets$gene_symbol, # The genes we want split into pathways
  my_sets$gs_name # The pathways made as the higher levels of the list
)

cases = rownames(plot1)[seq(2,nrow(plot1),2)]
control = rownames(plot2)[seq(2,nrow(plot2),2)]
var = GSVA::gsva(comb[,c(cases,control)], my_sets, method = "gsva", kcdf = "Gaussian",  min.sz = 5, max.sz = 500, mx.diff = TRUE, verbose = FALSE)

rownames(var) = sub("^[^_]*_", "", rownames(var))

ann_col = data.frame(row.names = colnames(var),
                     Cell = c(rep("CD362+  ",4),rep("CD362-  ",4)))

col <- c("forestgreen", "purple")
names(col) <- c("CD362+  ", "CD362-  ")
ann_clr <- list(Cell = col)

var = t(scale(t(var), center = T, scale = T))

pheatmap(var,
         cluster_rows = T, 
         cluster_cols = T,
         scale="none",
         annotation_col=ann_col,
         annotation_colors = ann_clr,
         color = hcl.colors(100, "RdBu",rev=T))

Whilst not significant, the mean enrichment scores reveal negative regulation of innate immune respose to be higher in stimulated CD362+ cells, in conjunction with activation of innate immune response. It is worthwhile viewing the differentially expressed genes involved in these pathways to draw conclusions from this - View CD362+ Monocyte/Macrophage (Innate Pathway) Associated Gene Sets.

pathway = c()
pval = c()
pos = c()
neg = c()

for(i in 1:nrow(var)){
  name = rownames(var)[i]
  pathway = c(pathway, name)
  row = as.data.frame(var[i,])
  t = t.test(row[1:4,], row[5:8,], alternative = "two.sided")
  pval = c(pval, t$p.value)
  pos = c(pos, as.numeric(t$estimate[1]))
  neg =  c(neg, as.numeric(t$estimate[2]))
}

adj = p.adjust(pval, method = "BH")

result = data.frame(row.names = pathway,
                    mean_CD362_positive = pos,
                    mean_CD362_negative = neg,
                    p.value = pval,
                    adj.p.val = adj)

result = result[order(result$mean_CD362_positive, decreasing = T),]
DT::datatable(result, rownames = T, options=list(scrollX=T))

plota = clst_search1[which(clst_search1$ID %in% ids),]
plotb = clst_search2[which(clst_search2$ID %in% ids),]

save = c()
for(i in 1:nrow(plota)){
  row = as.data.frame(plota[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
plota$size = save

save = c()
for(i in 1:nrow(plotb)){
  row = as.data.frame(plotb[i,])
  size = (length(strsplit(row$Up_regulated, ", ")[[1]]) + length(strsplit(row$Down_regulated, ", ")[[1]]))
  save = c(save, size)
}
plotb$size = save

plota = subset(plota, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plota$group = "CD362+"
plotb = subset(plotb, select=c(Term_Description, Fold_Enrichment, lowest_p, Cluster, size))
plotb$group = "CD362-"

plot = rbind(plota, plotb)

ggplot(plot, aes(x=Fold_Enrichment,y=Term_Description)) +
  geom_point(aes(color=group,size=size), position=position_jitter(h=0.0,w=0.0), alpha = 0.6) +
  scale_color_manual(values = c("CD362+" = "forestgreen",
                                "CD362-"="purple")) +
  #scale_color_gradientn(colours = rocket(100, begin = 0, end = 0.5, alpha = 0.8, direction = -1)) +
  labs( title = "Monocyte/Macrophage (Innate Pathway)",
        subtitle = " KEGG, Reactome, GO-BP, GO-MF, GO-CC",
    x='Fold Enrichment', y=NULL,
    color='Cell Type',size='Gene\nnumber'
  ) + theme_linedraw() + scale_size(range = c(3, 10)) + scale_size(range = c(3, 10)) +
    theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size=12),
    axis.text.y.left = element_text(size = 12),
    axis.title.x.bottom = element_text(size=10)
  )

View CD362+ Monocyte/Macrophage (Innate Pathway) Associated Gene Sets
View CD362- Monocyte/Macrophage (Innate Pathway) Associated Gene Sets
Return to Table of Contents

Gene Level Heatmaps

GOBP_ACTIVATION_OF_INNATE_IMMUNE_RESPONSE

genes = my_sets$GOBP_ACTIVATION_OF_INNATE_IMMUNE_RESPONSE
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOBP_ANTIVIRAL_INNATE_IMMUNE_RESPONSE

genes = my_sets$GOBP_ANTIVIRAL_INNATE_IMMUNE_RESPONSE
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOBP_NEGATIVE_REGULATION_OF_INNATE_IMMUNE_RESPONSE

genes = my_sets$GOBP_NEGATIVE_REGULATION_OF_INNATE_IMMUNE_RESPONSE
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

GOBP_REGULATION_OF_INNATE_IMMUNE_RESPONSE

genes = my_sets$GOBP_REGULATION_OF_INNATE_IMMUNE_RESPONSE
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

REACTOME_REGULATION_OF_INNATE_IMMUNE_RESPONSES_TO_CYTOSOLIC_DNA

genes = my_sets$REACTOME_REGULATION_OF_INNATE_IMMUNE_RESPONSES_TO_CYTOSOLIC_DNA
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(mat[,c(control,cases)], cluster_rows = T, cluster_cols = T, scale="none", annotation_col = ann_col, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

REACTOME_RHO_GTPASE_EFFECTORS

genes = my_sets$REACTOME_RHO_GTPASE_EFFECTORS
mat = comb[which(rownames(comb)%in% genes),]

mat = t(scale(t(mat), center = T, scale=T))

pheatmap(t(mat[,c(control,cases)]), cluster_rows = T, cluster_cols = T, scale="none", annotation_row = ann_col, angle_col = 45, annotation_colors = ann_clr, color = hcl.colors(100, "RdBu",rev=T))

Predictive Features

Elastic net logistic regression was performed on Monocyte/Macrophage pathway genes listed above to arrive at a set of genes capable of delineating Cytokine stimulated CD362+ cells vs. Cytokine stimulated CD362- cells:

genes = unique(unlist(my_sets))
mat = comb[which(rownames(comb) %in% genes), c(cases,control)]

mat = as.data.frame(scale(t(mat), center = T, scale=T))
mat$Group = factor(c(rep("CD362+",4),rep("CD362-",4)))
mat = mat %>% select_if(~ ! any(is.na(.)))
model = glmnet::cv.glmnet(as.matrix(mat[,c(1:ncol(mat)-1)]), mat$Group, family = "binomial", alpha = 1)
sig_feat = coef(model, s="lambda.min")
sig_df = data.frame(name = sig_feat@Dimnames[[1]][sig_feat@i + 1], coefficient = sig_feat@x)                          
sig_df = sig_df[2:nrow(sig_df),] 

mat = as.data.frame(t(mat))
mat = mat[which(rownames(mat) %in% sig_df$name),]
names = rownames(mat)
mat = as.data.frame(sapply(mat, as.numeric))
rownames(mat) = names

ann_col = data.frame(row.names = colnames(var),
                     Cell = c(rep("CD362+  ",4),rep("CD362-  ",4)))

col <- c("forestgreen", "purple")
names(col) <- c("CD362+  ", "CD362-  ")
ann_clr <- list(Cell = col)

mat = t(scale(t(mat), center = T, scale = T))

pheatmap(mat,
         cluster_rows = T, 
         cluster_cols = T,
         scale="column",
         annotation_col=ann_col,
         annotation_colors = ann_clr,
         color = hcl.colors(100, "RdBu",rev=T))

4 Private Gene Signatures

Gene signatures private to CD362+ and CD362- cells were identified and used as input for Enrichment Analysis using pathfindR.

results <- decideTests(fit, adjust.method = "BH", p.value = 0.05, lfc=0)
a <- vennCounts(results)

vennDiagram(results, 
            names=c("CD362+", "CD362-"),
    include=c("both"),
    counts.col=c("red", "blue"),
    circle.col = c("red", "blue", "green3"))

4.1 Cytokine Stimulated CD362+ Private Genes

alls <- affycoretools:::makeIndices(results[,1:2])
alls.genes <- lapply(alls, function(x) row.names(results[,1:2])[x])

pos_private = CD362_pos[which(CD362_pos$Gene %in% alls.genes[[1]]),]

Up Regulated

up = pos_private[which(pos_private$logFC>0),]
DT::datatable(up, rownames = F, options = list(scrollX=T))

Down Regulated

down = pos_private[which(pos_private$logFC<0),]
DT::datatable(down, rownames = F, options = list(scrollX=T))

Return to Table of Contents

4.2 Pathway Analysis Cytokine Stimulated CD362+ Private DEGs

posprikegg = read.csv("/data/ORBSEN_PATHFINDR/posprikegg.txt", header=T, sep="\t")
posprireactome = read.csv("/data/ORBSEN_PATHFINDR/posprireactome.txt", header=T, sep="\t")
posprigo = read.csv("/data/ORBSEN_PATHFINDR/posprigo.txt", header=T, sep="\t")

4.2.1 KEGG

DT::datatable(posprikegg[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=4))

4.2.2 Reactome

DT::datatable(posprireactome[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=4))

4.2.3 GO

DT::datatable(posprigo[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=4))

# posprikegg = read.csv("/data/ORBSEN_PATHFINDR/posprikegg.txt", header=T, sep="\t")
# posprireactome = read.csv("/data/ORBSEN_PATHFINDR/posprireactome.txt", header=T, sep="\t")
# posprigo = read.csv("/data/ORBSEN_PATHFINDR/posprigo.txt", header=T, sep="\t")
# 
# test = rbind(posprikegg, posprireactome, posprigo)
# 
# sets <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:KEGG")
# sets1 <-  msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
# sets2 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:BP")
# sets3 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:MF")
# sets4 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:CC")
# master = rbind(sets, sets1, sets2, sets3, sets4)
# # use all sets returned by private CD362 genes
# my_sets = master[which(master$gs_exact_source %in% test$ID),]
# ids = unique(my_sets$gs_exact_source)
# my_sets = split(
#   my_sets$gene_symbol, # The genes we want split into pathways
#   my_sets$gs_name # The pathways made as the higher levels of the list
# )
# 
# cases = rownames(plot1)[seq(2,nrow(plot1),2)]
# control = rownames(plot2)[seq(2,nrow(plot2),2)]
# var = GSVA::gsva(comb[,c(cases,control)], my_sets, method = "gsva", kcdf = "Gaussian",  min.sz = 5, max.sz = Inf, mx.diff = TRUE, verbose = F)
# rownames(var) = sub("^[^_]*_", "", rownames(var))
# 
# 
# # run GLMNET on it
# var = as.data.frame(t(var))
# var$Group = factor(c(rep("CD362+",4),rep("CD362-",4)))
# model = glmnet::cv.glmnet(as.matrix(var[,c(1:ncol(var)-1)]), var$Group, family = "binomial", alpha = 0.3)
# sig_feat = coef(model, s="lambda.min")
# sig_df = data.frame(name = sig_feat@Dimnames[[1]][sig_feat@i + 1], coefficient = sig_feat@x)                          
# sig_df = sig_df[2:nrow(sig_df),]                  
# 
# var = as.data.frame(t(var))
# var = var[which(rownames(var) %in% sig_df$name),]
# names = rownames(var)
# var = as.data.frame(sapply(var, as.numeric))
# rownames(var) = names
# 
# ann_col = data.frame(row.names = colnames(var),
#                      Cell = c(rep("CD362+  ",4),rep("CD362-  ",4)))
# 
# col <- c("forestgreen", "purple")
# names(col) <- c("CD362+  ", "CD362-  ")
# ann_clr <- list(Cell = col)
# 
# var = t(scale(t(var), center = T, scale = T))
# 
# pheatmap(var,
#          cluster_rows = T, 
#          cluster_cols = T,
#          scale="none",
#          annotation_col=ann_col,
#          annotation_colors = ann_clr,
#          color = hcl.colors(100, "RdBu",rev=T))

Return to Table of Contents

4.3 Cytokine Stimulated CD362- Private Genes

neg_private = CD362_neg[which(CD362_neg$Gene %in% alls.genes[[2]]),]

Up Regulated

up = neg_private[which(neg_private$logFC>0),]
DT::datatable(up, rownames = F, options = list(scrollX=T))

Down Regulated

down = neg_private[which(neg_private$logFC<0),]
DT::datatable(down, rownames = F, options = list(scrollX=T))

Return to Table of Contents

4.4 Pathway Analysis Cytokine Stimulated CD362- Private DEGs

negprikegg = read.csv("/data/ORBSEN_PATHFINDR/negprikegg.txt", header=T, sep="\t")
negprireactome = read.csv("/data/ORBSEN_PATHFINDR/negprireactome.txt", header=T, sep="\t")
negprigo = read.csv("/data/ORBSEN_PATHFINDR/negprigo.txt", header=T, sep="\t")

4.4.1 KEGG

DT::datatable(negprikegg[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=4))

4.4.2 Reactome

DT::datatable(negprireactome[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=4))

4.4.3 GO

DT::datatable(negprigo[,c(2,3,6,9,10)], rownames = F, options = list(scrollX=T,pageLength=4))

Return to Table of Contents

# negprikegg = read.csv("/data/ORBSEN_PATHFINDR/negprikegg.txt", header=T, sep="\t")
# negprireactome = read.csv("/data/ORBSEN_PATHFINDR/negprireactome.txt", header=T, sep="\t")
# negprigo = read.csv("/data/ORBSEN_PATHFINDR/negprigo.txt", header=T, sep="\t")
# 
# test = rbind(negprikegg, negprireactome, negprigo)
# 
# sets <- msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:KEGG")
# sets1 <-  msigdbr::msigdbr(species="Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
# sets2 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:BP")
# sets3 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:MF")
# sets4 <- msigdbr::msigdbr(species="Homo sapiens", category = "C5", subcategory = "GO:CC")
# master = rbind(sets, sets1, sets2, sets3, sets4)
# # use all sets returned by private CD362 genes
# my_sets = master[which(master$gs_exact_source %in% test$ID),]
# my_sets = split(
#   my_sets$gene_symbol, # The genes we want split into pathways
#   my_sets$gs_name # The pathways made as the higher levels of the list
# )
# 
# cases = rownames(plot1)[seq(2,nrow(plot1),2)]
# control = rownames(plot2)[seq(2,nrow(plot2),2)]
# var = GSVA::gsva(comb[,c(cases,control)], my_sets, method = "gsva", kcdf = "Gaussian",  min.sz = 5, max.sz = Inf, mx.diff = TRUE, verbose = FALSE)
# rownames(var) = sub("^[^_]*_", "", rownames(var))
# 
# 
# # run GLMNET on it
# var = as.data.frame(t(var))
# var$Group = factor(c(rep("CD362+",4),rep("CD362-",4)))
# model = glmnet::cv.glmnet(as.matrix(var[,c(1:ncol(var)-1)]), var$Group, family = "binomial", alpha = 0.5)
# sig_feat = coef(model, s="lambda.min")
# sig_df = data.frame(name = sig_feat@Dimnames[[1]][sig_feat@i + 1], coefficient = sig_feat@x)                          
# sig_df = sig_df[2:nrow(sig_df),]                  
# 
# var = as.data.frame(t(var))
# var = var[which(rownames(var) %in% sig_df$name),]
# names = rownames(var)
# var = as.data.frame(sapply(var, as.numeric))
# rownames(var) = names
# 
# ann_col = data.frame(row.names = colnames(var),
#                      Cell = c(rep("CD362+  ",4),rep("CD362-  ",4)))
# 
# col <- c("forestgreen", "purple")
# names(col) <- c("CD362+  ", "CD362-  ")
# ann_clr <- list(Cell = col)
# 
# pheatmap(var,
#          cluster_rows = T, 
#          cluster_cols = T, 
#          annotation_col=ann_col,
#          annotation_colors = ann_clr,
#          color = hcl.colors(100, "RdBu",rev=T))

Return to Table of Contents