library(biomaRt)
library(immunedeconv)
## Loading required package: EPIC
library(corrplot)
## corrplot 0.92 loaded
library(forcats)
library(stringr)
library(here)
## here() starts at /home/mchopra/Documents/PhD-Year1/deconvolution
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:biomaRt':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(RColorBrewer)
library(CePa)
##
## Attaching package: 'CePa'
## The following object is masked from 'package:tidyr':
##
## spread
library(ensembldb) ##also downloaded package EnsDb.Hsapiens.v86
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## Loading required package: GenomeInfoDb
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: AnnotationFilter
##
## Attaching package: 'ensembldb'
## The following object is masked from 'package:dplyr':
##
## filter
## The following object is masked from 'package:stats':
##
## filter
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:BiocGenerics':
##
## combine
## The following object is masked from 'package:dplyr':
##
## combine
setwd("/home/mchopra/Documents/PhD-Year1/deconvolution")
annotate_genes <- function(df){
df$hgnc_symbol <- df$gene
mart <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl") #, host="uswest.ensembl.org")
info <- getBM(attributes=c("hgnc_symbol",
"ensembl_gene_id_version",
"chromosome_name",
"start_position",
"end_position",
"strand",
"entrezgene_description"),
filters = c("hgnc_symbol"),
values = df$gene,
mart = mart,
useCache=FALSE)
tmp <- merge(df, info, by="hgnc_symbol")
tmp$strand <- gsub("-1", "-", tmp$strand)
tmp$strand <- gsub("1", "+", tmp$strand)
tmp <- tmp[!grepl("CHR", tmp$chromosome_name),]
return(tmp)
}
convert_ensg_version_to_hgnc_df_rownames <- function(df, ensdb = EnsDb.Hsapiens.v86){
library(AnnotationDbi)
library(ensembldb)
library(EnsDb.Hsapiens.v86)
genes_base <- str_split_fixed(string = rownames(df), pattern = "\\.", n = 2)[,1]
newnames_original <- suppressWarnings(mapIds(EnsDb.Hsapiens.v86,
keys = genes_base,
column = 'SYMBOL',
keytype = 'GENEID'))
# keep ensg version of newnames is na or is duplicated
newnames <- ifelse(is.na(newnames_original) | duplicated(newnames_original),
rownames(df), newnames_original)
rownames(df) <- newnames
return(df)
}
gct_data <- read.table("/home/mchopra/Documents/PhD-Year1/deconvolution/Deconvolution_results/GTEx_aortic_tissues/gene_tpm_artery_aorta.gct", header = TRUE, sep = "\t", skip = 2, stringsAsFactors = FALSE)
#gct_data
colnames(gct_data) <- gsub("\\.", "-", colnames(gct_data))
colnames(gct_data) <- gsub("GTE", "GTEX", colnames(gct_data))
cols <- colnames(gct_data)[3:ncol(gct_data)]
test <- str_split_fixed(cols, "-", n = 5)[,1:2]
test <- paste("GTEX", str_split_fixed(cols, "-", n = 5)[,2], sep = "-")
#test
colnames(gct_data)[3:ncol(gct_data)] <- test
gct_data[1:5, 1:6]
## id Name GTEX- GTEX-111YS GTEX-1122O GTEX-1128S
## 1 0 ENSG00000223972.5 DDX11L1 0.000 0.000 0.0000
## 2 1 ENSG00000227232.5 WASH7P 3.652 3.402 7.7330
## 3 2 ENSG00000278267.1 MIR6859-1 0.000 0.000 0.0000
## 4 3 ENSG00000243485.5 MIR1302-2HG 0.000 0.000 0.0659
## 5 4 ENSG00000237613.2 FAM138A 0.051 0.000 0.0000
colnames(gct_data)[3] <- "gene_name"
colnames(gct_data)[2] <- "gene_id"
gct_data <- gct_data[,-1] ##to delete column 1
#gct_data
gct_hgnc <- convert_ensg_version_to_hgnc_df_rownames(gct_data)
#gct_hgnc
gct_hgnc_not_duplicated <- gct_hgnc[!duplicated(gct_hgnc$gene_name),]
gct_hgnc_not_duplicated_hgnc_col <- subset(gct_hgnc_not_duplicated, select = -c(gene_id) )
rownames(gct_hgnc_not_duplicated) <- gct_hgnc_not_duplicated$gene_name
gct_hgnc_not_duplicated <- subset(gct_hgnc_not_duplicated, select = -c(gene_name, gene_id) )
#gct_hgnc_not_duplicated
#metadata <- read.csv(file = here("/home/mchopra/Documents/PhD-Year1/deconvolution/phenotype.csv"))
metadata <- read.csv("/home/mchopra/Documents/PhD-Year1/deconvolution/phenotype.csv", sep = "\t")
#metadata
colnames(metadata)[1] <- "SUBJID"
metadata$SUBJID <- as.character(metadata$SUBJID)
metadata$AGE <- as.character(metadata$AGE)
metadata$AGE <- case_when(
metadata$AGE %in% c("20-29") ~ "30-39",
metadata$AGE %in% c("30-39") ~ "30-39",
metadata$AGE %in% c("40-49") ~ "40-49",
metadata$AGE %in% c("50-59") ~ "50-59",
metadata$AGE %in% c("60-69") ~ "60-69",
metadata$AGE %in% c("70-79") ~ "70-79",
TRUE ~ metadata$AGE # Keep other values unchanged
)
#metadata
##RUN MCP COUNTER
deconvolution_mcp <- immunedeconv::deconvolute(gct_hgnc_not_duplicated, "mcp_counter")
##
## >>> Running mcp_counter
#deconvolution_mcp
transposed = as.data.frame(t(deconvolution_mcp))
colnames(transposed) <- transposed[1,] ##when you are not specifying anything, you are considering all of the values. eg. [row:column] ...setting column name using the first row
#transposed
write.csv(transposed, "transposed_mcp.csv", row.names = TRUE)
data <- read.csv("transposed_mcp.csv")
data <- data[-1,]
#data[1:5][1:5]
colnames(data)[1] <- "SUBJID"
#data
merged_data <- inner_join(data, metadata, by = "SUBJID")
#merged_data
merged_data[,2:12] <-lapply(merged_data[, 2:12], as.numeric)
ggplot(merged_data, aes(x= AGE, y = T.cell)) +
geom_violin(aes(group = AGE), na.rm = TRUE) +
labs(x = "age group", y = "T-cell Score") +
ggtitle("Violin Plot in T cells")
#merged_data[1:5][1:5]
#y-axis column names
y_axis_columns <- colnames(merged_data)[2:12]
#list to store plots
plots <- list()
#loop through y axis columns and create violin plots
for (y_col in y_axis_columns) {
p <- ggplot(merged_data, aes(x = AGE, y = .data[[y_col]])) +
geom_violin() +
labs(x = "Age Group", y = y_col) +
ggtitle(paste("Violin Plot - Cellular Composition", y_col, "by Age Group"))
plots[[y_col]] <- p
}
mcp_plots <- grid.arrange(grobs = plots, ncol = 3)
ggsave("combined_plots.jpg", mcp_plots, width = 12, height = 8, units = "in", dpi = 300)
#gct_hgnc_not_duplicated[1:5][1:5]
##quantiseq
#deconvolute_xcell <- immunedeconv::deconvolute(gct_hgnc_not_duplicated, method= "cibersort")
#deconvolute_xcell
transposed_xcell = as.data.frame(t(deconvolute_xcell)) colnames(transposed_xcell) <- transposed_xcell[1,] ##when you are not specifying anything, you are considering all of the values. eg. [row:column] …setting column name using the first row transposed_xcell <- transposed_xcell[-1,] transposed_xcell
write.csv(transposed_xcell, “transposed_xcell.csv”, row.names = TRUE ) ……
data_xcell <- read.csv(“transposed_xcell.csv”)
colnames(data_xcell)[1] <- “SUBJID”
data_xcell
…..
merged_data_xcell <- inner_join(data_xcell, metadata, by = “SUBJID”) merged_data_xcell
……. merged_data_xcell[,2:44] <- merged_data_xcell[, 2:44], as.numeric …. merged_data_xcell[1:5][1:5]
#y-axis column names y_axis_columns_xcell <- colnames(merged_data_xcell)[2:44]
#list to store plots plots <- list()
#loop through y axis columns and create violin plots for (y_col in y_axis_columns_xcell) { p <- ggplot(merged_data_xcell, aes(x = AGE, y = .data[[y_col]])) + geom_violin() + labs(x = “Age Group”, y = y_col) + ggtitle(paste(“Violin Plot - Cellular Composition”, y_col, “by Age Group”))
plots[[y_col]] <- p }
xcell_plots <- grid.arrange(grobs = plots, ncol = 3)
ggsave(“combined_plots_xcell.jpg”, mcp_plots, width = 12, height = 8, units = “in”, dpi = 300) …. deconvolution_timer <- immunedeconv::deconvolute(gct_hgnc_not_duplicated, “timer”, indications = rep(“FBN1”, 24))