baseModel2021

suppressWarnings({library(omicade4)
library(mogsa)
library(RSpectra)
# library(lubridate)
library(glmnet)
library(dplyr)
library(cowplot)
library(ggplot2)
library(reshape2)
library(DBI)
library(RPostgreSQL)
library(tidyr)
library(dplyr)
library(readr)
library(tibble)
library(tidyverse)})

## Loading required package: ade4

## Loading required package: Matrix

## Loaded glmnet 4.1-7

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:mogsa':
## 
##     combine

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

## 
## Attaching package: 'tidyr'

## The following object is masked from 'package:reshape2':
## 
##     smiths

## The following objects are masked from 'package:Matrix':
## 
##     expand, pack, unpack

## ── Attaching core tidyverse packages ─────────────────── tidyverse 2.0.0.9000 ──
## ✔ forcats   1.0.0     ✔ purrr     1.0.1
## ✔ lubridate 1.9.2     ✔ stringr   1.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::combine()   masks mogsa::combine()
## ✖ tidyr::expand()    masks Matrix::expand()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ tidyr::pack()      masks Matrix::pack()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ✖ tidyr::unpack()    masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Connecting to database

dsn_database = "cmipb_v4_0"   
dsn_hostname = "cmi-pb.lji.org"
dsn_port = "5432"                
dsn_uid = "cmipb"         
dsn_pwd = "b5mq62vW7JE2YUwq"


tryCatch({
    drv <- dbDriver("PostgreSQL")
    print("Connecting to Database…")
    connec <- dbConnect(drv,
                 dbname = dsn_database,
                 host = dsn_hostname,
                 port = dsn_port,
                 user = dsn_uid,
                 password = dsn_pwd)
    print("Database Connected!")
    },
    error=function(cond) {
            print("Unable to connect to Database.")
    })

## [1] "Connecting to Database…"
## [1] "Database Connected!"

# dbListTables(connec)

Make Meta data table

setwd("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/")
metaDf <- dbGetQuery(connec, "SELECT DISTINCT sub.subject_id, sub.infancy_vac, sub.biological_sex, sub.year_of_birth, sub.ethnicity, sub.race, sam.specimen_type, sub.date_of_boost, sub.dataset FROM subject sub join specimen sam on sam.subject_id=sub.subject_id where sub.dataset='2021_dataset';")

metaDf$date_of_boost <- parse_date_time(metaDf$date_of_boost,"ymd")
metaDf$year_of_birth <- parse_date_time(metaDf$year_of_birth,"ymd")
metaDf$age_at_boost <- as.numeric(round(difftime(metaDf$date_of_boost, metaDf$year_of_birth, units="week")/52,2))

# write.csv(x=metaDf, "rasteh/results/metaData.csv", row.names = FALSE)
# metaDf1 <- metaDf %>% distinct()
# subj_df[,c("subject_id", "infancy_vac", "biological_sex", "year_of_birth", "date_of_boost")]

# metaDf <- merge()

Generate cell frequency table

cell_df <- dbGetQuery(connec, "SELECT sam.subject_id, sam.planned_day_relative_to_boost, exp.cell_type_name, exp.percent_live_cell FROM pbmc_cell_frequency exp join specimen sam on sam.specimen_id=exp.specimen_id join subject sub on sam.subject_id=sub.subject_id where sam.planned_day_relative_to_boost in (0, 1, 3, 14) and sub.dataset='2021_dataset';")
colnames(cell_df)

## [1] "subject_id"                    "planned_day_relative_to_boost"
## [3] "cell_type_name"                "percent_live_cell"

piv_cell_df<- cell_df %>% pivot_wider(names_from = c(cell_type_name, planned_day_relative_to_boost), values_from = percent_live_cell)
# piv_cell_df
# write.csv(x=piv_cell_df, "rasteh/results/cellFrequency.csv", row.names = FALSE)

Generate ab_titer table

# abtiter_df <- dbGetQuery(connec, "SELECT sam.subject_id, sam.planned_day_relative_to_boost, exp.isotype, exp.antigen, exp.lower_limit_of_detection FROM plasma_ab_titer exp join specimen sam on sam.specimen_id=exp.specimen_id join subject sub on sam.subject_id=sub.subject_id where sam.planned_day_relative_to_boost in (0, 1, 3, 14) and sub.dataset='2021_dataset';")

abtiter_df <- dbGetQuery(connec, "SELECT sam.subject_id, sam.planned_day_relative_to_boost, exp.* FROM plasma_ab_titer exp join specimen sam on sam.specimen_id=exp.specimen_id join subject sub on sam.subject_id=sub.subject_id where sam.planned_day_relative_to_boost in (0, 1, 3, 14) and sub.dataset='2021_dataset';")

colnames(abtiter_df)

##  [1] "subject_id"                    "planned_day_relative_to_boost"
##  [3] "specimen_id"                   "isotype"                      
##  [5] "is_antigen_specific"           "antigen"                      
##  [7] "MFI"                           "MFI_normalised"               
##  [9] "unit"                          "lower_limit_of_detection"

piv_abtiter_df <- abtiter_df[c("subject_id", "planned_day_relative_to_boost", "isotype", "antigen", "MFI_normalised")] %>% pivot_wider(names_from = c(isotype, antigen, planned_day_relative_to_boost), values_from = MFI_normalised)
# write.csv(x=piv_abtiter_df, "rasteh/results/abtiter.csv", row.names = FALSE)
# piv_abtiter_df

Generate RNAseq data

rna_df <- dbGetQuery(connec, "SELECT sam.subject_id, sam.planned_day_relative_to_boost, exp.versioned_ensembl_gene_id, exp.tpm FROM pbmc_gene_expression exp join specimen sam on sam.specimen_id=exp.specimen_id join subject sub on sam.subject_id=sub.subject_id where sam.planned_day_relative_to_boost in (0, 1, 3, 14) and sub.dataset='2021_dataset';")
colnames(rna_df)

## [1] "subject_id"                    "planned_day_relative_to_boost"
## [3] "versioned_ensembl_gene_id"     "tpm"

rna_df$versioned_ensembl_gene_id <- gsub("\\..*", "", rna_df$versioned_ensembl_gene_id)

piv_rna_df <- rna_df %>% pivot_wider(names_from = c(versioned_ensembl_gene_id, planned_day_relative_to_boost), values_from = tpm)

# write.csv(x=piv_rna_df, "rasteh/results/RNAseq.csv", row.names = FALSE)
# piv_rna_df

Combine datasets

# colnames(piv_abtiter_df)

df_list <- list(metaDf, piv_cell_df[c("subject_id", "Monocytes_0", "Monocytes_1")], piv_rna_df[c("subject_id", "ENSG00000277632_0", "ENSG00000277632_3")], piv_abtiter_df[c("subject_id", "IgG_FHA_0", "IgG_PRN_0", "IgG_PT_0", "IgG_PT_14")])

pre_df <- df_list %>% reduce(full_join, by='subject_id')

Defining column types

colnames(pre_df)

##  [1] "subject_id"        "infancy_vac"       "biological_sex"   
##  [4] "year_of_birth"     "ethnicity"         "race"             
##  [7] "specimen_type"     "date_of_boost"     "dataset"          
## [10] "age_at_boost"      "Monocytes_0"       "Monocytes_1"      
## [13] "ENSG00000277632_0" "ENSG00000277632_3" "IgG_FHA_0"        
## [16] "IgG_PRN_0"         "IgG_PT_0"          "IgG_PT_14"

colnames(pre_df) <- gsub("ENSG00000277632", "CCL3", colnames(pre_df))

catCols <- c("infancy_vac", "biological_sex", "ethnicity", "race")
numCols_0 <- c("age_at_boost", "Monocytes_0", "CCL3_0", "IgG_FHA_0", "IgG_PRN_0", "IgG_PT_0")

xCols <- c("infancy_vac", "biological_sex", "age_at_boost", "Monocytes_0", "CCL3_0", "IgG_FHA_0", "IgG_PRN_0", "IgG_PT_0")
yCols <- c("Monocytes_1", "CCL3_3", "IgG_PT_14")

df <- pre_df[c("subject_id", xCols, yCols)]

# write.csv(x=df, "rasteh/results/phase1.csv", row.names = FALSE)

plot Distribution

distPlot <- function(col, df){
  p <- 
    ggplot(df) +
    aes_string(col)

  if(is.numeric(df[[col]])) {
    p <- p + geom_density()

  } else {
    p <- p + geom_bar()
  }
  knitr::opts_chunk$set(fig.width=8, fig.height=6)
}

distPlots <- lapply(c(xCols, yCols), distPlot, df=df)

## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# png("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/distribution_basic.png", width = 15, height = 10 , units = 'in', res = 300)
plot_grid(plotlist = distPlots)

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

# dev.off()

Normalized data

df$CCL3_0 <- log2(df$CCL3_0+1)
df$CCL3_3 <- log2(df$CCL3_3+1)

df$IgG_FHA_0 <- log2(df$IgG_FHA_0+1)
df$IgG_PRN_0 <- log2(df$IgG_PRN_0+1)
df$IgG_PT_0 <- log2(df$IgG_PT_0+1)
df$IgG_PT_14 <- log2(df$IgG_PT_14+1)

distPlots <- lapply(c(xCols, yCols), distPlot, df=df)


# png("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/distribution_normalized.png", width = 15, height = 10 , units = 'in', res = 300)
plot_grid(plotlist = distPlots)

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class list into a
## grob.

# dev.off()

violin plot

violinPlot <- function(df, type_var, value, variable, title, leg_title, n){
  ggplot(df, aes_string(x=type_var, y=value, fill= type_var)) +
    geom_violin(scale="count")+
    geom_boxplot(width=0.1,color="black",position = position_dodge(width =0.9))+
    geom_jitter(height = 0, width = 0.1)+
    # theme(axis.title.x = element_text (size=30, face = "bold"), 
    #       # axis.title.y = element_text(size=30, face = "bold"),
    #       axis.title.y = element_blank(),
    #       axis.text.x = element_text(colour = "Black", size = 30, face = "bold"),
    #       axis.text.y = element_text(colour = "Black", size = 30, face = "bold"),
    #       legend.title = element_text(colour = "Black", size = 30, face = "bold"),
    #       legend.text = element_text(colour = "Black", size = 30),
    #       legend.position = c(0.98,0.98), legend.justification = c(0.95,0.95),
    #       legend.key.height = unit(2, "lines"),
    #       strip.text.y.right = element_text(colour = "Black", size = 30, angle=-90, face = "bold"),
    #       plot.title = element_text(colour = "Black",
    #                                 size=50, family = "Courier", 
    #                                 face = "bold", hjust = 0.5))+
    facet_grid(vars(variable), scale="free")+
    ggtitle(title)+
    scale_fill_discrete(name = leg_title)
}

vacType <- melt(df[c("infancy_vac", "Monocytes_1", "CCL3_3", "IgG_PT_14")])

## Using infancy_vac as id variables

print(names(vacType))

## [1] "infancy_vac" "variable"    "value"

# png ("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/violinPlot_vacType.png", width = 15, height = 25 , units = 'in', res = 300)
knitr::opts_chunk$set(fig.width=8, fig.height=6)
violinPlot(na.omit(vacType), "infancy_vac", "value", "variable", "After Vaccination Results", "Vaccine Type", 3)

# dev.off()

sex <- na.omit(melt(df[c("biological_sex", "Monocytes_1", "CCL3_3", "IgG_PT_14")]))

## Using biological_sex as id variables

# sex_plot <- violinPlot(sex, "biological_sex", "value", "variable", "Biological Sex", "Gender", 3)

knitr::opts_chunk$set(fig.width=8, fig.height=6)
# png("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/violinPlot_sex.png", width = 15, height = 25 , units = 'in', res = 300)
violinPlot(sex, "biological_sex", "value", "variable", "After Vaccination Results", "Gender", 3)

# dev.off()


vacType0 <- melt(df[c("infancy_vac", "Monocytes_0", "CCL3_0", "IgG_PT_0")])

## Using infancy_vac as id variables

print(names(vacType))

## [1] "infancy_vac" "variable"    "value"

knitr::opts_chunk$set(fig.width=8, fig.height=6)
# png ("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/violinPlot_vacType_day0.png", width = 15, height = 25 , units = 'in', res = 300)
violinPlot(na.omit(vacType0), "infancy_vac", "value", "variable", "Before Vaccination Results", "Vaccine Type", 3)

# dev.off()

sex0 <- na.omit(melt(df[c("biological_sex", "Monocytes_0", "CCL3_0", "IgG_PT_0")]))

## Using biological_sex as id variables

# sex_plot <- violinPlot(sex, "biological_sex", "value", "variable", "Biological Sex", "Gender", 3)

knitr::opts_chunk$set(fig.width=8, fig.height=6) 
# png("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/violinPlot_sex_day0.png", width = 15, height = 25 , units = 'in', res = 300)
violinPlot(sex0, "biological_sex", "value", "variable", "Before Vaccination Results", "Gender", 3)

# dev.off()


# vacType_plot <- violinPlot(vacType, "infancy_vac", "value", "variable", "Infancy Vaccine", "Vaccine Type", 3)

# vacType_cell <- violinPlot(df, "infancy_vac", "Monocytes_1")
# vacType_gene <- violinPlot(df, "infancy_vac", "CCL3_3")
# vacType_abtiter <- violinPlot(df, "infancy_vac", "IgG_PT_14")
# 
# vacType_plot <-  do.call("grid.arrange", c(list(vacType_cell, vacType_gene, vacType_abtiter), ncol=3))
#

df$Monocytes_diff <- df$Monocytes_1 - df$Monocytes_0
df$CCL3_diff <- df$CCL3_3 - df$CCL3_0
df$IgG_PT_diff <- df$IgG_PT_14 - df$IgG_PT_0

vacType <- melt(df[c("infancy_vac", "Monocytes_diff", "CCL3_diff", "IgG_PT_diff")])

## Using infancy_vac as id variables

print(names(vacType))

## [1] "infancy_vac" "variable"    "value"

knitr::opts_chunk$set(fig.width=8, fig.height=6)
# png ("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/violinPlot_vacType_diff.png", width = 20, height = 25 , units = 'in', res = 300)
violinPlot(na.omit(vacType), "infancy_vac", "value", "variable", "Difference Between Before & After Vaccination", "Vaccine Type", 3)

# dev.off()

sex <- na.omit(melt(df[c("biological_sex", "Monocytes_diff", "CCL3_diff", "IgG_PT_diff")]))

## Using biological_sex as id variables

# sex_plot <- violinPlot(sex, "biological_sex", "value", "variable", "Biological Sex", "Gender", 3)

knitr::opts_chunk$set(fig.width=8, fig.height=6)  
# png("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/violinPlot_sex_diff.png", width = 20, height = 25 , units = 'in', res = 300)
violinPlot(sex, "biological_sex", "value", "variable", "Difference Between Before & After Vaccination", "Gender", 3)

# dev.off()

Encoding data

df$infancy_vac <- factor(df$infancy_vac, labels = c(0,1), exclude = NA)
df$biological_sex <- factor(df$biological_sex, labels = c(0,1), exclude = NA)
df <- df[order(df$subject_id, decreasing = F),]

Heatmap of missing values

# """
# https://support.bioconductor.org/p/82447/
# https://jokergoo.github.io/ComplexHeatmap-reference/book/a-single-heatmap.html
# 
# https://jokergoo.github.io/ComplexHeatmap-reference/book/a-list-of-heatmaps.html#:~:text=To%20concatenate%20heatmaps%2C%20simply%20use%20%2B%20operator.&text=Under%20default%20mode%2C%20dendrograms%20from,concatenation%20is%20a%20HeatmapList%20object.
# """

library("ComplexHeatmap")

## Loading required package: grid

## ========================================
## ComplexHeatmap version 2.14.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================

library("circlize")

## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================

library(RColorBrewer)
library(magrittr)

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

library(dplyr)

demograph <- colnames(df)[colnames(df)%in%colnames(metaDf)][-c(1)]
rnaseq <- colnames(df)[grepl("CCL3", colnames(df))]
abtiter <- colnames(df)[colnames(df)%in%colnames(piv_abtiter_df)][-c(1)]
cellfreq <- colnames(df)[colnames(df)%in%colnames(piv_cell_df)][-c(1)]
class(df$CCL3_0)

## [1] "numeric"

df.t <- as.data.frame(t(df[,-1]))
dftRow <- rownames(df.t)
df.t <- as.data.frame(sapply(df.t, as.numeric))
rownames(df.t) <- dftRow
class(df.t$`89`)

## [1] "NULL"

colnames(df.t) <- df$subject_id

# category <- sapply(rownames(df.t), function(x) {
#   if(x %in% demograph){
#     assign(x, "Demography")
#   }else if(x %in% rnaseq){
#     assign(x, "RNA seq")
#   }else if(x %in% abtiter){
#     assign(x, "ab_titer")
#   }else{
#     assign(x, "Cell Frequency")
#   }
# })
# 
# categories <- factor(category, levels = c("Demograpy", "RNA seq", "ab_titer", "Cell Frequency"))

demographDf <- df.t %>% filter(row.names(df.t) %in% demograph)
demograph <- as.data.frame(sapply(demograph, as.numeric))

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

col1 <- colorRamp2(c(min(unlist(demographDf), na.rm =TRUE), max(unlist(demographDf), na.rm =TRUE)), c("yellow", "yellow"))
h1 <- Heatmap(demographDf, name = "Demography", na_col = "grey", 
              row_title = "Demography", 
              col = col1, cluster_rows = FALSE, 
              cluster_columns = FALSE,
              show_heatmap_legend = FALSE,
              row_names_gp = gpar(fontsize = 15),
              column_title_gp = gpar(fontsize = 15),
              row_title_gp = gpar(fontsize = 20))

## Warning: The input is a data frame-like object, convert it to a matrix.

rnaDf <- df.t %>% filter(row.names(df.t) %in% rnaseq)
col2 <- colorRamp2(c(min(unlist(rnaDf), na.rm =TRUE), max(unlist(rnaDf), na.rm =TRUE)), c("blue", "blue"))

h2 <- Heatmap(rnaDf, name = "RNA seq", na_col = "grey", 
              row_title = "RNA seq", 
              col = col2, cluster_rows = FALSE, 
              cluster_columns = FALSE,
              show_heatmap_legend = FALSE,
              # row_names_gp = gpar(fontsize = 15),
              # column_title_gp = gpar(fontsize = 15),
              # row_title_gp = gpar(fontsize = 20)
              )

## Warning: The input is a data frame-like object, convert it to a matrix.

abtiterDf <- df.t %>% filter(row.names(df.t) %in% abtiter)
col3 <- colorRamp2(c(min(unlist(abtiterDf), na.rm =TRUE), max(unlist(abtiterDf), na.rm =TRUE)), c("red", "red"))
h3 <- Heatmap(abtiterDf, name = "ab_titer", na_col = "grey",
              row_title = "ab_titer", 
              col = col3, cluster_rows = FALSE, 
              cluster_columns = FALSE,
              show_heatmap_legend = FALSE,
              # row_names_gp = gpar(fontsize = 15),
              # column_title_gp = gpar(fontsize = 15),
              # row_title_gp = gpar(fontsize = 25)
              )

## Warning: The input is a data frame-like object, convert it to a matrix.

cellfreqDf <- df.t %>% filter(row.names(df.t) %in% cellfreq)
col4 <- colorRamp2(c(min(unlist(cellfreqDf), na.rm =TRUE), max(unlist(cellfreqDf), na.rm =TRUE)), c("green", "green"))
h4 <- Heatmap(cellfreqDf, name = "Cell Frequency", na_col = "grey",
              row_title = "Cell Frequency", 
              col = col4, cluster_rows = FALSE, 
              cluster_columns = FALSE,
              show_heatmap_legend = FALSE,
              # row_names_gp = gpar(fontsize = 15),
              # column_title_gp = gpar(fontsize = 15),
              # row_title_gp = gpar(fontsize = 18)
              )

## Warning: The input is a data frame-like object, convert it to a matrix.

knitr::opts_chunk$set(fig.width=8, fig.height=6)
ht_list = h1 %v% h2 %v% h3 %v% h4

# png("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/heatmap.png", width = 15, height = 10 , units = 'in', res = 300)
draw(ht_list)

# dev.off()

scatter Plot

library(ggplot2)
library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

## The following object is masked from 'package:mogsa':
## 
##     combine

library(egg)
library(patchwork)

## 
## Attaching package: 'patchwork'

## The following object is masked from 'package:cowplot':
## 
##     align_plots

library("ggpmisc")

## Loading required package: ggpp

## 
## Attaching package: 'ggpp'

## The following object is masked from 'package:ggplot2':
## 
##     annotate

library(ggpubr)

## 
## Attaching package: 'ggpubr'

## The following object is masked from 'package:egg':
## 
##     ggarrange

## The following object is masked from 'package:cowplot':
## 
##     get_legend

gList <- list()
names <- list()

for (y in yCols){
  for (x in xCols){
    
    filteredY <- na.omit(df[,c("subject_id", y)])
    filteredX <- na.omit(df[,c("subject_id", x)])
    int_x <- intersect(filteredY$subject_id, filteredX$subject_id)
    
    names <- c(names, paste0('Number of Samples: ', length(int_x)))
    
    missingPercent <- (dim(df)-length(int_x))/dim(df)*100
    
    p= ggplot(data = na.omit(df), aes_string(x, y)) + 
      geom_point()+
      labs(title = paste('Number of Samples: ', length(int_x), "\nMissing Values%: ", round(missingPercent, digits=1)))
    
    gList <- c(gList, list(p))
    
  }
}

print(dim(df))

## [1] 36 15

# gList <-gList %>% map(~.x + labs(x=NULL, y=NULL))
scaterPlot <- do.call("grid.arrange", c(gList, nrow = 3, ncol=8))

knitr::opts_chunk$set(fig.width=8, fig.height=6)
# quartz.save("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/scatterPlot.png", width = 22, height = 12)
scaterPlot

## TableGrob (3 x 8) "arrange": 24 grobs
##     z     cells    name           grob
## 1   1 (1-1,1-1) arrange gtable[layout]
## 2   2 (1-1,2-2) arrange gtable[layout]
## 3   3 (1-1,3-3) arrange gtable[layout]
## 4   4 (1-1,4-4) arrange gtable[layout]
## 5   5 (1-1,5-5) arrange gtable[layout]
## 6   6 (1-1,6-6) arrange gtable[layout]
## 7   7 (1-1,7-7) arrange gtable[layout]
## 8   8 (1-1,8-8) arrange gtable[layout]
## 9   9 (2-2,1-1) arrange gtable[layout]
## 10 10 (2-2,2-2) arrange gtable[layout]
## 11 11 (2-2,3-3) arrange gtable[layout]
## 12 12 (2-2,4-4) arrange gtable[layout]
## 13 13 (2-2,5-5) arrange gtable[layout]
## 14 14 (2-2,6-6) arrange gtable[layout]
## 15 15 (2-2,7-7) arrange gtable[layout]
## 16 16 (2-2,8-8) arrange gtable[layout]
## 17 17 (3-3,1-1) arrange gtable[layout]
## 18 18 (3-3,2-2) arrange gtable[layout]
## 19 19 (3-3,3-3) arrange gtable[layout]
## 20 20 (3-3,4-4) arrange gtable[layout]
## 21 21 (3-3,5-5) arrange gtable[layout]
## 22 22 (3-3,6-6) arrange gtable[layout]
## 23 23 (3-3,7-7) arrange gtable[layout]
## 24 24 (3-3,8-8) arrange gtable[layout]

# dev.off()

Regression Line

dataDf <- df[, !names(df) %in% catCols]

gList <- list()
names <- list()

for (y in yCols){
  for (x in numCols_0){
    
    filteredY <- na.omit(dataDf[,c("subject_id", y)])
    filteredX <- na.omit(dataDf[,c("subject_id", x)])
    int_x <- intersect(filteredY$subject_id, filteredX$subject_id)
    
    names <- c(names, paste0('Number of Samples: ', length(int_x)))
    
    missingPercent <- (dim(dataDf)-length(int_x))/dim(dataDf)*100
    
    p= ggplot(data = na.omit(dataDf), aes_string(x, y)) + 
      geom_point()+ 
      stat_smooth(method = "lm")+
      labs(title = paste('Number of Samples: ', length(int_x), "\nMissing Values%: ", round(missingPercent, digits=1)))+
      stat_cor()
    
    gList <- c(gList, list(p))
    
  }
}

knitr::opts_chunk$set(fig.width=8, fig.height=6)
regPlot <- do.call("grid.arrange", c(gList,nrow = 3))

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

# quartz.save("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/regressionPlot.png", width = 15, height = 10)
regPlot

## TableGrob (3 x 6) "arrange": 18 grobs
##     z     cells    name           grob
## 1   1 (1-1,1-1) arrange gtable[layout]
## 2   2 (1-1,2-2) arrange gtable[layout]
## 3   3 (1-1,3-3) arrange gtable[layout]
## 4   4 (1-1,4-4) arrange gtable[layout]
## 5   5 (1-1,5-5) arrange gtable[layout]
## 6   6 (1-1,6-6) arrange gtable[layout]
## 7   7 (2-2,1-1) arrange gtable[layout]
## 8   8 (2-2,2-2) arrange gtable[layout]
## 9   9 (2-2,3-3) arrange gtable[layout]
## 10 10 (2-2,4-4) arrange gtable[layout]
## 11 11 (2-2,5-5) arrange gtable[layout]
## 12 12 (2-2,6-6) arrange gtable[layout]
## 13 13 (3-3,1-1) arrange gtable[layout]
## 14 14 (3-3,2-2) arrange gtable[layout]
## 15 15 (3-3,3-3) arrange gtable[layout]
## 16 16 (3-3,4-4) arrange gtable[layout]
## 17 17 (3-3,5-5) arrange gtable[layout]
## 18 18 (3-3,6-6) arrange gtable[layout]

# dev.off()

Regression for predictors and difference between outcomes and their conterpart before vaccination

dataDf <- df[, !names(df) %in% catCols]

colnames(dataDf)

##  [1] "subject_id"     "age_at_boost"   "Monocytes_0"    "CCL3_0"        
##  [5] "IgG_FHA_0"      "IgG_PRN_0"      "IgG_PT_0"       "Monocytes_1"   
##  [9] "CCL3_3"         "IgG_PT_14"      "Monocytes_diff" "CCL3_diff"     
## [13] "IgG_PT_diff"

diffCols <- c("Monocytes_diff", "CCL3_diff", "IgG_PT_diff")

gList <- list()
names <- list()

for (y in diffCols){
  for (x in numCols_0){

    filteredY <- na.omit(dataDf[,c("subject_id", y)])
    filteredX <- na.omit(dataDf[,c("subject_id", x)])
    int_x <- intersect(filteredY$subject_id, filteredX$subject_id)

    names <- c(names, paste0('Number of Samples: ', length(int_x)))

    missingPercent <- (dim(dataDf)-length(int_x))/dim(dataDf)*100

    p= ggplot(data = na.omit(dataDf), aes_string(x, y)) +
      geom_point()+
      stat_smooth(method = "lm")+
      labs(title = paste('Number of Samples: ', length(int_x), "\nMissing Values%: ", round(missingPercent, digits=1)))+
      stat_cor()

    gList <- c(gList, list(p))

  }
}

knitr::opts_chunk$set(fig.width=8, fig.height=6)
regPlot <- do.call("grid.arrange", c(gList,nrow = 3))

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

# quartz.save("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/regressionPlot_difference.png", width = 15, height = 10)
regPlot

## TableGrob (3 x 6) "arrange": 18 grobs
##     z     cells    name           grob
## 1   1 (1-1,1-1) arrange gtable[layout]
## 2   2 (1-1,2-2) arrange gtable[layout]
## 3   3 (1-1,3-3) arrange gtable[layout]
## 4   4 (1-1,4-4) arrange gtable[layout]
## 5   5 (1-1,5-5) arrange gtable[layout]
## 6   6 (1-1,6-6) arrange gtable[layout]
## 7   7 (2-2,1-1) arrange gtable[layout]
## 8   8 (2-2,2-2) arrange gtable[layout]
## 9   9 (2-2,3-3) arrange gtable[layout]
## 10 10 (2-2,4-4) arrange gtable[layout]
## 11 11 (2-2,5-5) arrange gtable[layout]
## 12 12 (2-2,6-6) arrange gtable[layout]
## 13 13 (3-3,1-1) arrange gtable[layout]
## 14 14 (3-3,2-2) arrange gtable[layout]
## 15 15 (3-3,3-3) arrange gtable[layout]
## 16 16 (3-3,4-4) arrange gtable[layout]
## 17 17 (3-3,5-5) arrange gtable[layout]
## 18 18 (3-3,6-6) arrange gtable[layout]

# dev.off()

Regression for predictors and outcoms counterparts at day 0

dataDf <- df[, !names(df) %in% catCols]

colnames(dataDf)

##  [1] "subject_id"     "age_at_boost"   "Monocytes_0"    "CCL3_0"        
##  [5] "IgG_FHA_0"      "IgG_PRN_0"      "IgG_PT_0"       "Monocytes_1"   
##  [9] "CCL3_3"         "IgG_PT_14"      "Monocytes_diff" "CCL3_diff"     
## [13] "IgG_PT_diff"

diffCols <- c("Monocytes_0", "CCL3_0", "IgG_PT_0")

gList <- list()
names <- list()

for (y in diffCols){
  for (x in numCols_0){

    filteredY <- na.omit(dataDf[,c("subject_id", y)])
    filteredX <- na.omit(dataDf[,c("subject_id", x)])
    int_x <- intersect(filteredY$subject_id, filteredX$subject_id)

    names <- c(names, paste0('Number of Samples: ', length(int_x)))

    missingPercent <- (dim(dataDf)-length(int_x))/dim(dataDf)*100

    p= ggplot(data = na.omit(dataDf), aes_string(x, y)) +
      geom_point()+
      stat_smooth(method = "lm")+
      labs(title = paste('Number of Samples: ', length(int_x), "\nMissing Values%: ", round(missingPercent, digits=1)))+
      stat_cor()

    gList <- c(gList, list(p))

  }
}

knitr::opts_chunk$set(fig.width=8, fig.height=6)
regPlot <- do.call("grid.arrange", c(gList,nrow = 3))

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

# quartz.save("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/regressionPlot_beforeVaccination.png", width = 15, height = 10)
regPlot

## TableGrob (3 x 6) "arrange": 18 grobs
##     z     cells    name           grob
## 1   1 (1-1,1-1) arrange gtable[layout]
## 2   2 (1-1,2-2) arrange gtable[layout]
## 3   3 (1-1,3-3) arrange gtable[layout]
## 4   4 (1-1,4-4) arrange gtable[layout]
## 5   5 (1-1,5-5) arrange gtable[layout]
## 6   6 (1-1,6-6) arrange gtable[layout]
## 7   7 (2-2,1-1) arrange gtable[layout]
## 8   8 (2-2,2-2) arrange gtable[layout]
## 9   9 (2-2,3-3) arrange gtable[layout]
## 10 10 (2-2,4-4) arrange gtable[layout]
## 11 11 (2-2,5-5) arrange gtable[layout]
## 12 12 (2-2,6-6) arrange gtable[layout]
## 13 13 (3-3,1-1) arrange gtable[layout]
## 14 14 (3-3,2-2) arrange gtable[layout]
## 15 15 (3-3,3-3) arrange gtable[layout]
## 16 16 (3-3,4-4) arrange gtable[layout]
## 17 17 (3-3,5-5) arrange gtable[layout]
## 18 18 (3-3,6-6) arrange gtable[layout]

# dev.off()

correlation plot

library(corrplot)

## corrplot 0.92 loaded

sm <- cor( na.omit(df[,numCols_0]), method="spearman")
knitr::opts_chunk$set(fig.width=8, fig.height=6)

# png("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/correlatinHeatmap_spearman.png", width = 15, height = 10 , units = 'in', res = 300 )
corrplot(sm, method = 'circle', type = 'lower', insig='blank',  
         addCoef.col ='black', number.cex = 0.8, order = 'AOE', diag=FALSE)

# dev.off()


pm <- cor( na.omit(df[,numCols_0]))
knitr::opts_chunk$set(fig.width=8, fig.height=6)
# png("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/correlatinHeatmap_pearson.png", width = 15, height = 10 , units = 'in', res = 300 )
corrplot(pm, method = 'circle', type = 'lower', insig='blank', 
         addCoef.col ='black', number.cex = 0.8, order = 'AOE', diag=FALSE)

# dev.off()

library(RcmdrMisc)

## Loading required package: car

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:purrr':
## 
##     some

## The following object is masked from 'package:dplyr':
## 
##     recode

## Loading required package: sandwich

library('rstatix')

## 
## Attaching package: 'rstatix'

## The following object is masked from 'package:stats':
## 
##     filter

sm <- cor_mat( na.omit(df[,numCols_0]), method ="spearman")
print(sm)

## # A tibble: 6 × 7
##   rowname      age_at_boost Monocytes_0 CCL3_0 IgG_FHA_0 IgG_PRN_0 IgG_PT_0
## * <chr>               <dbl>       <dbl>  <dbl>     <dbl>     <dbl>    <dbl>
## 1 age_at_boost        1          -0.15   0.41     -0.013     0.006    0.18 
## 2 Monocytes_0        -0.15        1     -0.074    -0.46      0.072   -0.26 
## 3 CCL3_0              0.41       -0.074  1         0.03     -0.063    0.027
## 4 IgG_FHA_0          -0.013      -0.46   0.03      1        -0.11     0.18 
## 5 IgG_PRN_0           0.006       0.072 -0.063    -0.11      1        0    
## 6 IgG_PT_0            0.18       -0.26   0.027     0.18      0        1

knitr::opts_chunk$set(fig.width=8, fig.height=6)
# png("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/correlatinHeatmap_spearman_pVal.png", width = 15, height = 10 , units = 'in', res = 300 )
sm %>% 
  cor_reorder() %>%
  pull_lower_triangle() %>%
  cor_plot(label = TRUE)

# dev.off()


pm <- cor_mat( na.omit(df[,numCols_0]), method ="pearson")
print(pm)

## # A tibble: 6 × 7
##   rowname      age_at_boost Monocytes_0  CCL3_0 IgG_FHA_0 IgG_PRN_0 IgG_PT_0
## * <chr>               <dbl>       <dbl>   <dbl>     <dbl>     <dbl>    <dbl>
## 1 age_at_boost        1          -0.053  0.14       0.08      0.083  -0.033 
## 2 Monocytes_0        -0.053       1      0.076     -0.42     -0.028  -0.23  
## 3 CCL3_0              0.14        0.076  1         -0.035    -0.08   -0.0033
## 4 IgG_FHA_0           0.08       -0.42  -0.035      1        -0.18    0.25  
## 5 IgG_PRN_0           0.083      -0.028 -0.08      -0.18      1      -0.061 
## 6 IgG_PT_0           -0.033      -0.23  -0.0033     0.25     -0.061   1

knitr::opts_chunk$set(fig.width=8, fig.height=6)
# png("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/phase1_results/correlatinHeatmap_pearson_pVal.png", width = 15, height = 10 , units = 'in', res = 300 )
pm%>% 
  cor_reorder() %>%
  pull_lower_triangle() %>%
  cor_plot(label = TRUE)

# dev.off()

baseModel2021

2023-05-31