deconv.knit

library(biomaRt)
library(immunedeconv)

## Loading required package: EPIC

library(corrplot)

## corrplot 0.92 loaded

library(forcats)
library(stringr)
library(here)

## here() starts at /home/mchopra/Documents/PhD-Year1/deconvolution

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:biomaRt':
## 
##     select

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(ggplot2)
library(RColorBrewer)
library(CePa)

## 
## Attaching package: 'CePa'

## The following object is masked from 'package:tidyr':
## 
##     spread

library(ensembldb) ##also downloaded package EnsDb.Hsapiens.v86

## Loading required package: BiocGenerics

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min

## Loading required package: GenomicRanges

## Loading required package: stats4

## Loading required package: S4Vectors

## 
## Attaching package: 'S4Vectors'

## The following object is masked from 'package:tidyr':
## 
##     expand

## The following objects are masked from 'package:dplyr':
## 
##     first, rename

## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname

## Loading required package: IRanges

## 
## Attaching package: 'IRanges'

## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice

## Loading required package: GenomeInfoDb

## Loading required package: GenomicFeatures

## Loading required package: AnnotationDbi

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## 
## Attaching package: 'AnnotationDbi'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: AnnotationFilter

## 
## Attaching package: 'ensembldb'

## The following object is masked from 'package:dplyr':
## 
##     filter

## The following object is masked from 'package:stats':
## 
##     filter

library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:Biobase':
## 
##     combine

## The following object is masked from 'package:BiocGenerics':
## 
##     combine

## The following object is masked from 'package:dplyr':
## 
##     combine

setwd("/home/mchopra/Documents/PhD-Year1/deconvolution")

annotate_genes <- function(df){

    df$hgnc_symbol <- df$gene
    mart <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl") #, host="uswest.ensembl.org")
    info <- getBM(attributes=c("hgnc_symbol",
                               "ensembl_gene_id_version",
                               "chromosome_name",
                               "start_position",
                               "end_position",
                               "strand",
                               "entrezgene_description"),
                  filters = c("hgnc_symbol"),
                  values = df$gene,
                  mart = mart,
                  useCache=FALSE)

    tmp <- merge(df, info, by="hgnc_symbol")
    tmp$strand <- gsub("-1", "-", tmp$strand)
    tmp$strand <- gsub("1", "+", tmp$strand)
    tmp <- tmp[!grepl("CHR", tmp$chromosome_name),]

    return(tmp)

}

convert_ensg_version_to_hgnc_df_rownames <- function(df, ensdb = EnsDb.Hsapiens.v86){
  library(AnnotationDbi)
  library(ensembldb)
  library(EnsDb.Hsapiens.v86)
  genes_base <- str_split_fixed(string = rownames(df), pattern = "\\.", n = 2)[,1]
  newnames_original <- suppressWarnings(mapIds(EnsDb.Hsapiens.v86,
    keys = genes_base,
    column = 'SYMBOL',
    keytype = 'GENEID'))
# keep ensg version of newnames is na or is duplicated
newnames <- ifelse(is.na(newnames_original) | duplicated(newnames_original),
    rownames(df), newnames_original)
rownames(df) <- newnames
return(df)
}

gct_data <- read.table("/home/mchopra/Documents/PhD-Year1/deconvolution/Deconvolution_results/GTEx_aortic_tissues/gene_tpm_artery_aorta.gct", header = TRUE, sep = "\t", skip = 2, stringsAsFactors = FALSE)

#gct_data

colnames(gct_data) <- gsub("\\.", "-", colnames(gct_data))
colnames(gct_data) <- gsub("GTE", "GTEX", colnames(gct_data))
cols <- colnames(gct_data)[3:ncol(gct_data)]
test <- str_split_fixed(cols, "-", n = 5)[,1:2]
test <- paste("GTEX", str_split_fixed(cols, "-", n = 5)[,2], sep = "-")
#test

colnames(gct_data)[3:ncol(gct_data)] <- test

gct_data[1:5, 1:6]

##   id              Name       GTEX- GTEX-111YS GTEX-1122O GTEX-1128S
## 1  0 ENSG00000223972.5     DDX11L1      0.000      0.000     0.0000
## 2  1 ENSG00000227232.5      WASH7P      3.652      3.402     7.7330
## 3  2 ENSG00000278267.1   MIR6859-1      0.000      0.000     0.0000
## 4  3 ENSG00000243485.5 MIR1302-2HG      0.000      0.000     0.0659
## 5  4 ENSG00000237613.2     FAM138A      0.051      0.000     0.0000

colnames(gct_data)[3] <- "gene_name"
colnames(gct_data)[2] <- "gene_id"

gct_data <- gct_data[,-1] ##to delete column 1
#gct_data

gct_hgnc <- convert_ensg_version_to_hgnc_df_rownames(gct_data)

#gct_hgnc

gct_hgnc_not_duplicated <- gct_hgnc[!duplicated(gct_hgnc$gene_name),]
gct_hgnc_not_duplicated_hgnc_col <- subset(gct_hgnc_not_duplicated, select = -c(gene_id) )
rownames(gct_hgnc_not_duplicated) <- gct_hgnc_not_duplicated$gene_name
gct_hgnc_not_duplicated <- subset(gct_hgnc_not_duplicated, select = -c(gene_name, gene_id) )
#gct_hgnc_not_duplicated

#metadata <- read.csv(file = here("/home/mchopra/Documents/PhD-Year1/deconvolution/phenotype.csv"))
metadata <- read.csv("/home/mchopra/Documents/PhD-Year1/deconvolution/phenotype.csv", sep = "\t")
#metadata
colnames(metadata)[1] <- "SUBJID"
metadata$SUBJID <- as.character(metadata$SUBJID)
metadata$AGE <- as.character(metadata$AGE)
metadata$AGE <- case_when(
  metadata$AGE %in% c("20-29") ~ "30-39",
  metadata$AGE %in% c("30-39") ~ "30-39",
  metadata$AGE %in% c("40-49") ~ "40-49",
  metadata$AGE %in% c("50-59") ~ "50-59",
  metadata$AGE %in% c("60-69") ~ "60-69",
  metadata$AGE %in% c("70-79") ~ "70-79",
  TRUE ~ metadata$AGE  # Keep other values unchanged
)

#metadata

##RUN MCP COUNTER

deconvolution_mcp <- immunedeconv::deconvolute(gct_hgnc_not_duplicated, "mcp_counter")

## 
## >>> Running mcp_counter

#deconvolution_mcp

transposed = as.data.frame(t(deconvolution_mcp))
colnames(transposed) <- transposed[1,] ##when you are not specifying anything, you are considering all of the values. eg. [row:column] ...setting column name using the first row
#transposed

write.csv(transposed, "transposed_mcp.csv", row.names = TRUE)

data <- read.csv("transposed_mcp.csv")
data <- data[-1,]
#data[1:5][1:5]

colnames(data)[1] <- "SUBJID"

#data

merged_data <- inner_join(data, metadata, by = "SUBJID")
#merged_data

merged_data[,2:12] <-lapply(merged_data[, 2:12], as.numeric)

ggplot(merged_data, aes(x= AGE, y = T.cell)) +
  geom_violin(aes(group = AGE), na.rm = TRUE) +
  labs(x = "age group", y = "T-cell Score") +
  ggtitle("Violin Plot in T cells")

#merged_data[1:5][1:5]

#y-axis column names
y_axis_columns <- colnames(merged_data)[2:12]

#list to store plots
plots <- list()

#loop through y axis columns and create violin plots
for (y_col in y_axis_columns) {
  p <- ggplot(merged_data, aes(x = AGE, y = .data[[y_col]])) +
    geom_violin() +
    labs(x = "Age Group", y = y_col) +
    ggtitle(paste("Violin Plot - Cellular Composition", y_col, "by Age Group"))
  
  plots[[y_col]] <- p
}

mcp_plots <- grid.arrange(grobs = plots, ncol = 3)

ggsave("combined_plots.jpg", mcp_plots, width = 12, height = 8, units = "in", dpi = 300)

#gct_hgnc_not_duplicated[1:5][1:5]

##quantiseq

#deconvolute_xcell <- immunedeconv::deconvolute(gct_hgnc_not_duplicated, method= "cibersort")

#deconvolute_xcell

transposed_xcell = as.data.frame(t(deconvolute_xcell)) colnames(transposed_xcell) <- transposed_xcell[1,] ##when you are not specifying anything, you are considering all of the values. eg. [row:column] …setting column name using the first row transposed_xcell <- transposed_xcell[-1,] transposed_xcell

write.csv(transposed_xcell, “transposed_xcell.csv”, row.names = TRUE ) ……

data_xcell <- read.csv(“transposed_xcell.csv”)

colnames(data_xcell)[1] <- “SUBJID”

data_xcell

…..

merged_data_xcell <- inner_join(data_xcell, metadata, by = “SUBJID”) merged_data_xcell

……. merged_data_xcell[,2:44] <- merged_data_xcell[, 2:44], as.numeric …. merged_data_xcell[1:5][1:5]

#y-axis column names y_axis_columns_xcell <- colnames(merged_data_xcell)[2:44]

#list to store plots plots <- list()

#loop through y axis columns and create violin plots for (y_col in y_axis_columns_xcell) { p <- ggplot(merged_data_xcell, aes(x = AGE, y = .data[[y_col]])) + geom_violin() + labs(x = “Age Group”, y = y_col) + ggtitle(paste(“Violin Plot - Cellular Composition”, y_col, “by Age Group”))

plots[[y_col]] <- p }

xcell_plots <- grid.arrange(grobs = plots, ncol = 3)

ggsave(“combined_plots_xcell.jpg”, mcp_plots, width = 12, height = 8, units = “in”, dpi = 300) …. deconvolution_timer <- immunedeconv::deconvolute(gct_hgnc_not_duplicated, “timer”, indications = rep(“FBN1”, 24))