suppressPackageStartupMessages({
  library(verification)
  library(pROC)
  library(discretefit)
  library(dplyr)
  library(ggplot2)
  library(tidyr)
  library(tidyverse)
  library(corrplot)
  library(data.table)
  library(readr)
  library(kableExtra)
  library(glmnet)
  library(here)
  library(kableExtra)
  library(formattable)
  library(colorRamp2)
  library(ComplexHeatmap)
  library(GetoptLong)
  library(ellipse)
})
DATASET <- c("2020_dataset", "2021_dataset")
TIMEPOINTS <- c(0, 1, 3, 14)
dataFile <- "combined_dataset2020_2021.csv"

META_COLS <- c("specimen_id", "subject_id", "timepoint", "dataset", 
               "biological_sex", "infancy_vac", "age_at_boost")
ABTITER_COLS <- c("IgG_PT",'IgG_FHA','IgG_PRN','IgG1_PT',
                  'IgG1_FHA', 'IgG4_PT', 'IgG4_FHA')
RNA_COLS <- c("CCL3", "IL6", "NFKBIA")
CELL_COLS <- c("Monocytes", "CD4Tcells")

DEMOGRAPHY_COLS <- c("age_at_boost", "biological_sex", "infancy_vac")

TASK_COLS <- c("Monocytes_D1", "CCL3_D3", "IgG_PT_D14")
TASKS_BASELINES <- c("Monocytes_D0", "CCL3_D0", "IgG_PT_D0")
BASE_COLS <- c("Monocytes_D0", "CD4Tcells_D0", "IgG_PT_D0", 'IgG_FHA_D0', 
               'IgG_PRN_D0','IgG1_PT_D0', 'IgG1_FHA_D0', 'IgG4_PT_D0', 
               'IgG4_FHA_D0', "CCL3_D0", "IL6_D0", "NFKBIA_D0")

LOG_TRANS_COLS <- c("CCL3_D0", "IgG_PT_D0","IgG_FHA_D0",
                    'IgG_PRN_D0','IgG1_PT_D0','IgG1_FHA_D0',
                    'IgG4_PT_D0','IgG4_FHA_D0', "IL6_D0", 
                    "NFKBIA_D0",  "CCL3_D3",  "IgG_PT_D14")
knitr::opts_chunk$set(warning = FALSE, message = FALSE) 
df_source <- readRDS(here("./data/master_normalized_data_challenge2_train.RDS"))

metaDf <- data.frame(df_source[["subject_specimen"]])
metaDf["age_at_boost"] <- as.numeric(round(difftime(metaDf$date_of_boost, metaDf$year_of_birth,units="weeks")/52, 2))
metaDf <- metaDf[, META_COLS]

abtiterDf <- data.frame(df_source[["plasma_antibody_levels_wide"]])
abtiterDf <- data.frame(abtiterDf[, ABTITER_COLS])
colnames(abtiterDf) <- ABTITER_COLS
abtiterDf["specimen_id"] <- as.numeric(row.names(abtiterDf))

rnaDf <- data.frame(df_source[["pbmc_gene_expression_wide"]])
tasks_seq <- c('ENSG00000277632','ENSG00000136244','ENSG00000100906')
for (i in 1:length(tasks_seq)){
  rnaDf <- data.frame(rnaDf %>% rename_at(vars(starts_with(tasks_seq[i])), ~RNA_COLS[i]))
}
rnaDf <- data.frame(rnaDf[, RNA_COLS])
colnames(rnaDf) <- RNA_COLS
rnaDf["specimen_id"] <- as.numeric(row.names(rnaDf))

cellDf <- data.frame(df_source[["pbmc_cell_frequency_wide"]])
cellDf <- data.frame(cellDf[, CELL_COLS])
colnames(cellDf) <- CELL_COLS
cellDf["specimen_id"] <- as.numeric(row.names(cellDf))

list_df <- list(metaDf, cellDf, abtiterDf, rnaDf)
df_merge <- list_df %>% reduce(full_join, by="specimen_id")
df_merge <- df_merge[df_merge$timepoint %in% TIMEPOINTS, ]

df_pivot <- df_merge[, names(df_merge)!="specimen_id"] %>% 
  pivot_wider(id_cols=c("subject_id", "dataset", "biological_sex", 
                        "infancy_vac", "age_at_boost"), 
              names_from = timepoint, 
              values_from = all_of(c(CELL_COLS, RNA_COLS, ABTITER_COLS)), 
              names_sep = "_D")

df_pivot <- df_pivot[df_pivot$dataset %in% DATASET, ]
knitr::opts_chunk$set(warning = FALSE, message = FALSE) 

df <- df_pivot[, c("subject_id", DEMOGRAPHY_COLS, BASE_COLS, TASK_COLS)]

df$infancy_vac <- as.numeric(factor(df$infancy_vac, labels = c(1,2), levels = c("wP", "aP"), exclude = NA))
df$biological_sex <- as.numeric(factor(df$biological_sex, labels = c(1,2), exclude = NA))

for (ltCol in LOG_TRANS_COLS){
   df[, paste("normalized", ltCol, sep = "_")] <- log2(df[, ltCol]+1)
}

targetX <- c("Monocytes_D0", "CCL3_D0", "IgG_PT_D0")
targetY <- c("Monocytes_D1","CCL3_D3", "IgG_PT_D14",
             "normalized_CCL3_D3", "normalized_IgG_PT_D14")

fc_cols <- paste(targetY, "FC", sep="_")
# 
ranked_cols <- paste(c("age_at_boost", targetX, targetY, fc_cols), "Rank", sep="_")


rankingFunction <- function(x) {
  as.numeric(rank(-x, ties.method = "average", na.last = "keep"))
}

targetX <- c("Monocytes_D0", "CCL3_D0", "IgG_PT_D0",
             "normalized_CCL3_D0", "normalized_IgG_PT_D0")
targetY <- c("Monocytes_D1","CCL3_D3", "IgG_PT_D14",
             "normalized_CCL3_D3", "normalized_IgG_PT_D14")

df[,"Monocytes_D1_FC"] <- df[, "Monocytes_D1"] / df[, "Monocytes_D0"]
df[,"CCL3_D3_FC"] <- df[, "CCL3_D3"] / df[, "CCL3_D0"]
df[,"IgG_PT_D14_FC"] <- df[, "IgG_PT_D14"] / df[, "IgG_PT_D0"]

df[,"normalized_CCL3_D3_FC"] <- df[, "normalized_CCL3_D3"] / df[, "normalized_CCL3_D0"]
df[,"normalized_IgG_PT_D14_FC"] <- df[, "normalized_IgG_PT_D14"] / df[, "normalized_IgG_PT_D0"]

# df[, ranked_cols] <- apply(df[, c("age_at_boost", targetX, targetY, fc_cols)],
                           # 2, rankingFunction)
df <- data.frame(df)
lassoFunction <- function(df, corrDf, pvalDf, model, y, x){

  for (j in 1:length(y)){

    subDf <- na.omit(df[, c(x, y[j], sub("normalized_", "", y[j]))])
    predVals <- c()
    trueVals<- c()
    
    ###print(c(y[j], sub("normalized_", "", y[j])))
    for (i in 1:nrow(subDf)){
      oneout <- subDf[-c(i), ]

      suppressWarnings(fit_1<-cv.glmnet(x=as.matrix(oneout[, x]), 
                                        oneout[,y[j]], family='gaussian', 
                                        alpha=1, nfolds=nrow(oneout)-1))
      predictor <- data.frame(subDf[i, x])
      colnames(predictor) <- x
      preds<-predict(fit_1, newx=as.matrix(predictor), s='lambda.min')

      predVals <- c(predVals, preds)
      trueVals <- c(trueVals, subDf[i, sub("normalized_", "", y[j])])
    }
    trueVals <- rank(trueVals, ties.method = "average", na.last = "keep")
    predVals <- rank(predVals, ties.method = "average", na.last = "keep")
    correlations <- cor.test(trueVals, predVals, method="spearman")

    pvalDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$p.value)
    corrDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$estimate)
  }
  return(list(corrDf, pvalDf))
}


lassoFunction_FC <- function(df, corrDf, pvalDf, model, y, x){

  for (j in 1:length(y)){
    pattern <- sub("_D.*", "", y[j])
    denom <- str_subset(x, pattern)

    subDf <- na.omit(df[, c(x, y[j], sub("normalized_", "", y[j]), denom, sub("normalized_", "", denom))])
    predVals <- c()
    trueVals<- c()
    
    predVals_FC <- c()
    trueVals_FC <- c()
    
    ###print(c(y[j], denom))
    for (i in 1:nrow(subDf)){
      oneout <- subDf[-c(i), ]

      suppressWarnings(fit_1<-cv.glmnet(x=as.matrix(oneout[, x]), oneout[,y[j]],
                                        family='gaussian', alpha=1, nfolds=nrow(oneout)-1))
      predictor <- data.frame(subDf[i, x])
      colnames(predictor) <- x
      preds<-predict(fit_1, newx=as.matrix(predictor), s='lambda.min')

      predVals <- c(predVals, preds)
      trueVals <- c(trueVals, subDf[i, sub("normalized_", "", y[j])])

      predVals_FC <- c(predVals_FC, preds/subDf[i, denom])
      trueVals_FC <- c(trueVals_FC, subDf[i, sub("normalized_", "", y[j])]/subDf[i, sub("normalized_", "", denom)])
      
    }
    trueVals <- rank(trueVals, ties.method = "average", na.last = "keep")
    predVals <- rank(predVals, ties.method = "average", na.last = "keep")
    correlations <- cor.test(trueVals, predVals, method="spearman")

    pvalDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$p.value)
    corrDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$estimate)

    trueVals_FC <- rank(trueVals_FC, ties.method = "average", na.last = "keep")
    predVals_FC <- rank(predVals_FC, ties.method = "average", na.last = "keep")
    correlations_FC <- cor.test(trueVals_FC, predVals_FC, method="spearman")

    pvalDf[paste(sub("normalized_", "", y[j]), "FC", sep="_"), model] <- as.matrix(correlations_FC$p.value)
    corrDf[paste(sub("normalized_", "", y[j]), "FC", sep="_"), model] <- as.matrix(correlations_FC$estimate)
  }
  return(list(corrDf, pvalDf))
}


lassoFunction_FC_normalized <- function(df, corrDf, pvalDf, model, y, x){

  for (j in 1:length(y)){

    subDf <- na.omit(df[, c(x, y[j], sub("normalized_", "", y[j]))])
    predVals <- c()
    trueVals<- c()
    
    ###print(c(y[j], sub("normalized_", "", y[j])))
    for (i in 1:nrow(subDf)){
      oneout <- subDf[-c(i), ]

      suppressWarnings(fit_1<-cv.glmnet(x=as.matrix(oneout[, x]), 
                                        oneout[,y[j]], family='gaussian', 
                                        alpha=1, nfolds=nrow(oneout)-1))
      predictor <- data.frame(subDf[i, x])
      colnames(predictor) <- x
      preds<-predict(fit_1, newx=as.matrix(predictor), s='lambda.min')

      predVals <- c(predVals, preds)
      trueVals <- c(trueVals, subDf[i, y[j]])
    }
    trueVals <- rank(trueVals, ties.method = "average", na.last = "keep")
    predVals <- rank(predVals, ties.method = "average", na.last = "keep")
    correlations <- cor.test(trueVals, predVals, method="spearman")

    pvalDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$p.value)
    corrDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$estimate)
  }
  return(list(corrDf, pvalDf))
}

The model is trained based on the foldchange and compared with foldchange

set.seed(0)
corrDf <- data.frame()
pvalDf <- data.frame()

corrDf_FC <- data.frame()
pvalDf_FC <- data.frame()

x <- c("Monocytes_D0", "normalized_CCL3_D0", "normalized_IgG_PT_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction(df, corrDf, pvalDf, "3Baseline", y, x)
corrDf <- data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])


x <- c("Monocytes_D0", "normalized_CCL3_D0", "normalized_IgG_PT_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")

res <- lassoFunction(df, corrDf_FC, pvalDf_FC, "3Baseline", y, x)
corrDf_FC <- data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])


x <- c("age_at_boost", "infancy_vac", "biological_sex",
       "Monocytes_D0", "normalized_IgG_PT_D0", "normalized_CCL3_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction(df, corrDf, pvalDf, "Demography_3Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])


x <- c("age_at_boost", "infancy_vac", "biological_sex",
       "Monocytes_D0", "normalized_IgG_PT_D0", "normalized_CCL3_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")

res <- lassoFunction(df, corrDf_FC, pvalDf_FC, "Demography_3Baseline", y, x)
corrDf_FC <-data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])


x <- c("Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
       'normalized_IgG1_FHA_D0', 'normalized_IgG_FHA_D0',
       'normalized_IgG_PRN_D0', 'normalized_IgG1_PT_D0',
       'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
       "normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction(df, corrDf, pvalDf, "12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])


x <- c("Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
       'normalized_IgG1_FHA_D0', 'normalized_IgG_FHA_D0',
       'normalized_IgG_PRN_D0', 'normalized_IgG1_PT_D0',
       'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
       "normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")

res <- lassoFunction(df, corrDf_FC, pvalDf_FC, "12Baseline", y, x)
corrDf_FC <-data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])


x <- c("age_at_boost", "infancy_vac", "biological_sex",
       "Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
       'normalized_IgG_FHA_D0','normalized_IgG_PRN_D0',
       'normalized_IgG1_PT_D0','normalized_IgG1_FHA_D0',
       'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
       "normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction(df, corrDf, pvalDf, "Demography_12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])


x <- c("age_at_boost", "infancy_vac", "biological_sex",
       "Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
       'normalized_IgG_FHA_D0','normalized_IgG_PRN_D0',
       'normalized_IgG1_PT_D0','normalized_IgG1_FHA_D0',
       'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
       "normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")

res <- lassoFunction(df, corrDf_FC, pvalDf_FC, "Demography_12Baseline", y, x)
corrDf_FC <-data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])
knitr::opts_chunk$set(warning = FALSE, message = FALSE)

corrDf_bind <- rbind(corrDf, corrDf_FC)
pvalDf_bind <- rbind(pvalDf, pvalDf_FC)

colOrder <- c("Monocytes_D1", "Monocytes_D1_FC", "CCL3_D3",
              "CCL3_D3_FC", "IgG_PT_D14", "IgG_PT_D14_FC")

colnames(corrDf_bind)[colnames(corrDf_bind)=="X3Baseline"] <- "3 Baseline "
colnames(corrDf_bind)[colnames(corrDf_bind)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(corrDf_bind)[colnames(corrDf_bind)=="X14Baseline"] <- "14 Baseline"
colnames(corrDf_bind)[colnames(corrDf_bind)=="Demography_14Baseline"] <- "Demography + 14 Baseline"
#
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="X3Baseline"] <- "3 Baseline "
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="X14Baseline"] <- "14 Baseline"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Demography_14 Baseline"] <- "Demography + 14 Baseline"

corrDf_bind <- t(corrDf_bind)[, colOrder]
pvalDf_bind <- t(pvalDf_bind)[, colOrder]
colnames(corrDf_bind)[colnames(corrDf_bind)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"
#
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"


# png(here(paste("./results/",  "/LASSO_corelationHeatmap_predictByFoldchange.png", sep = "")),
#     width = 15, height = 10, units = 'in', res = 300 )

maxCorr <- max(abs(as.matrix(corrDf_bind)), na.rm = T)
corrplot(as.matrix(corrDf_bind),
         tl.col = 'black',
         addCoef.col = 1,
         # mar=c(0,0,0,3),
         number.cex = 1.2, tl.srt = 60,
         p.mat = as.matrix(pvalDf_bind),
         sig.level = 0.05,
         col.lim = c(-maxCorr, maxCorr),
         cl.ratio = 0.2,
         col = colorRampPalette(c("blue", "white","red"))(100))

# dev.off()

Model for fold changea re being compared to the normalized data

set.seed(0)
corrDf <- data.frame()
pvalDf <- data.frame()

corrDf_FC <- data.frame()
pvalDf_FC <- data.frame()

x <- c("Monocytes_D0", "normalized_CCL3_D0", "normalized_IgG_PT_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction(df, corrDf, pvalDf, "3Baseline", y, x)
corrDf <- data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])


x <- c("Monocytes_D0", "normalized_CCL3_D0", "normalized_IgG_PT_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")

res <- lassoFunction_FC_normalized(df, corrDf_FC, pvalDf_FC, "3Baseline", y, x)
corrDf_FC <- data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])


x <- c("age_at_boost", "infancy_vac", "biological_sex",
       "Monocytes_D0", "normalized_IgG_PT_D0", "normalized_CCL3_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction(df, corrDf, pvalDf, "Demography_3Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])


x <- c("age_at_boost", "infancy_vac", "biological_sex",
       "Monocytes_D0", "normalized_IgG_PT_D0", "normalized_CCL3_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")

res <- lassoFunction_FC_normalized(df, corrDf_FC, pvalDf_FC, "Demography_3Baseline", y, x)
corrDf_FC <-data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])


x <- c("Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
       'normalized_IgG1_FHA_D0', 'normalized_IgG_FHA_D0',
       'normalized_IgG_PRN_D0', 'normalized_IgG1_PT_D0',
       'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
       "normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction(df, corrDf, pvalDf, "12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])


x <- c("Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
       'normalized_IgG1_FHA_D0', 'normalized_IgG_FHA_D0',
       'normalized_IgG_PRN_D0', 'normalized_IgG1_PT_D0',
       'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
       "normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")

res <- lassoFunction_FC_normalized(df, corrDf_FC, pvalDf_FC, "12Baseline", y, x)
corrDf_FC <-data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])


x <- c("age_at_boost", "infancy_vac", "biological_sex",
       "Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
       'normalized_IgG_FHA_D0','normalized_IgG_PRN_D0',
       'normalized_IgG1_PT_D0','normalized_IgG1_FHA_D0',
       'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
       "normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction(df, corrDf, pvalDf, "Demography_12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])


x <- c("age_at_boost", "infancy_vac", "biological_sex",
       "Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
       'normalized_IgG_FHA_D0','normalized_IgG_PRN_D0',
       'normalized_IgG1_PT_D0','normalized_IgG1_FHA_D0',
       'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
       "normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")

res <- lassoFunction_FC_normalized(df, corrDf_FC, pvalDf_FC, "Demography_12Baseline", y, x)
corrDf_FC <-data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])
knitr::opts_chunk$set(warning = FALSE, message = FALSE)

corrDf_bind <- rbind(corrDf, corrDf_FC)
pvalDf_bind <- rbind(pvalDf, pvalDf_FC)

colOrder <- c("Monocytes_D1", "Monocytes_D1_FC", "CCL3_D3",
              "CCL3_D3_FC", "IgG_PT_D14", "IgG_PT_D14_FC")

colnames(corrDf_bind)[colnames(corrDf_bind)=="X3Baseline"] <- "3 Baseline "
colnames(corrDf_bind)[colnames(corrDf_bind)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(corrDf_bind)[colnames(corrDf_bind)=="X14Baseline"] <- "14 Baseline"
colnames(corrDf_bind)[colnames(corrDf_bind)=="Demography_14Baseline"] <- "Demography + 14 Baseline"
#
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="X3Baseline"] <- "3 Baseline "
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="X14Baseline"] <- "14 Baseline"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Demography_14 Baseline"] <- "Demography + 14 Baseline"

corrDf_bind <- t(corrDf_bind)[, colOrder]
pvalDf_bind <- t(pvalDf_bind)[, colOrder]
colnames(corrDf_bind)[colnames(corrDf_bind)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"
#
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"


# png(here(paste("./results/",  "/LASSO_corelationHeatmap_predictByFoldchange.png", sep = "")),
#     width = 15, height = 10, units = 'in', res = 300 )

maxCorr <- max(abs(as.matrix(corrDf_bind)), na.rm = T)
corrplot(as.matrix(corrDf_bind),
         tl.col = 'black',
         addCoef.col = 1,
         # mar=c(0,0,0,3),
         number.cex = 1.2, tl.srt = 60,
         p.mat = as.matrix(pvalDf_bind),
         sig.level = 0.05,
         col.lim = c(-maxCorr, maxCorr),
         cl.ratio = 0.2,
         col = colorRampPalette(c("blue", "white","red"))(100))

# dev.off()

the model is trained based on normalized task and the foldchange been estimated and result been compared with foldchange

knitr::opts_chunk$set(warning = FALSE, message = FALSE)
set.seed(0)

df1 <- data.frame(df_pivot)

df1$infancy_vac <- as.numeric(factor(df1$infancy_vac, labels = c(1,2),
                                     levels = c("wP", "aP"), exclude = NA))
df1$biological_sex <- as.numeric(factor(df1$biological_sex,
                                        labels = c(1,2), exclude = NA))

for (ltCol in LOG_TRANS_COLS){
   df1[, paste("normalized", ltCol, sep = "_")] <- log2(df1[, ltCol]+1)
}

corrDf <- data.frame()
pvalDf <- data.frame()

x <- c("Monocytes_D0", "normalized_CCL3_D0", "normalized_IgG_PT_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction_FC(df1, corrDf, pvalDf, "3Baseline", y, x)
corrDf <- data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])


x <- c("age_at_boost", "infancy_vac", "biological_sex",
       "Monocytes_D0", "normalized_IgG_PT_D0", "normalized_CCL3_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction_FC(df1, corrDf, pvalDf, "Demography_3Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])


x <- c("Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
       'normalized_IgG1_FHA_D0', 'normalized_IgG_FHA_D0',
       'normalized_IgG_PRN_D0', 'normalized_IgG1_PT_D0',
       'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
       "normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction_FC(df1, corrDf, pvalDf, "12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])


x <- c("age_at_boost", "infancy_vac", "biological_sex",
       "Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
       'normalized_IgG_FHA_D0','normalized_IgG_PRN_D0',
       'normalized_IgG1_PT_D0','normalized_IgG1_FHA_D0',
       'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
       "normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction_FC(df1, corrDf, pvalDf, "Demography_12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
knitr::opts_chunk$set(warning = FALSE, message = FALSE)

colOrder <- c("IgG_PT_D14", "IgG_PT_D14_FC",
              "Monocytes_D1", "Monocytes_D1_FC",
              "CCL3_D3", "CCL3_D3_FC")

colnames(corrDf)[colnames(corrDf)=="X3Baseline"] <- "3 Baseline "
colnames(corrDf)[colnames(corrDf)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(corrDf)[colnames(corrDf)=="X12Baseline"] <- "12 Baseline"
colnames(corrDf)[colnames(corrDf)=="Demography_14Baseline"] <- "Demography + 12 Baseline"

colnames(pvalDf)[colnames(pvalDf)=="X3Baseline"] <- "3 Baseline "
colnames(pvalDf)[colnames(pvalDf)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(pvalDf)[colnames(pvalDf)=="X12Baseline"] <- "12 Baseline"
colnames(pvalDf)[colnames(pvalDf)=="Demography_14 Baseline"] <- "Demography + 12 Baseline"

corrDf <- t(corrDf)[, colOrder]
pvalDf <- t(pvalDf)[, colOrder]
colnames(corrDf)[colnames(corrDf)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(corrDf)[colnames(corrDf)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(corrDf)[colnames(corrDf)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(corrDf)[colnames(corrDf)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(corrDf)[colnames(corrDf)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(corrDf)[colnames(corrDf)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"

colnames(pvalDf)[colnames(pvalDf)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(pvalDf)[colnames(pvalDf)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(pvalDf)[colnames(pvalDf)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(pvalDf)[colnames(pvalDf)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(pvalDf)[colnames(pvalDf)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(pvalDf)[colnames(pvalDf)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"

# png(here(paste("./results/",  "/LASSO_corelationHeatmap_predictBytask.png", sep = "")),
#     width = 15, height = 10, units = 'in', res = 300 )
maxCorr <- max(abs(as.matrix(corrDf)), na.rm = T)
corrplot(as.matrix(corrDf),
         tl.col = 'black',
         addCoef.col = 1,
         number.cex = 1.2, tl.srt = 60,
         p.mat = as.matrix(pvalDf),
         sig.level = 0.05,
         col.lim = c(-maxCorr, maxCorr),
         cl.ratio = 0.2,
         col = colorRampPalette(c("blue", "white","red"))(100))
# dev.off()

lassoFunction_FC <- function(df, corrDf, pvalDf, model, y, x){

  for (j in 1:length(y)){
    pattern <- sub("_D.*", "", y[j])
    denom <- str_subset(x, pattern)

    subDf <- na.omit(df[, c(x, y[j], sub("normalized_", "", y[j]), denom, sub("normalized_", "", denom))])
    predVals <- c()
    trueVals<- c()
    
    predVals_FC <- c()
    trueVals_FC <- c()
    
    ###print(c(y[j], denom))
    for (i in 1:nrow(subDf)){
      oneout <- subDf[-c(i), ]

      suppressWarnings(fit_1<-cv.glmnet(x=as.matrix(oneout[, x]), oneout[,y[j]],
                                        family='gaussian', alpha=1, nfolds=nrow(oneout)-1))
      predictor <- data.frame(subDf[i, x])
      colnames(predictor) <- x
      preds<-predict(fit_1, newx=as.matrix(predictor), s='lambda.min')

      predVals <- c(predVals, preds)
      trueVals <- c(trueVals, subDf[i, y[j]])

      predVals_FC <- c(predVals_FC, preds/subDf[i, denom])
      trueVals_FC <- c(trueVals_FC, subDf[i, y[j]]/subDf[i, denom])
      
    }
    trueVals <- rank(trueVals, ties.method = "average", na.last = "keep")
    predVals <- rank(predVals, ties.method = "average", na.last = "keep")
    correlations <- cor.test(trueVals, predVals, method="spearman")

    pvalDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$p.value)
    corrDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$estimate)

    trueVals_FC <- rank(trueVals_FC, ties.method = "average", na.last = "keep")
    predVals_FC <- rank(predVals_FC, ties.method = "average", na.last = "keep")
    correlations_FC <- cor.test(trueVals_FC, predVals_FC, method="spearman")

    pvalDf[paste(sub("normalized_", "", y[j]), "FC", sep="_"), model] <- as.matrix(correlations_FC$p.value)
    corrDf[paste(sub("normalized_", "", y[j]), "FC", sep="_"), model] <- as.matrix(correlations_FC$estimate)
  }
  return(list(corrDf, pvalDf))
}

the model is trained based on normalized task and the foldchange been estimated and result been compared with foldchange of normalized values

knitr::opts_chunk$set(warning = FALSE, message = FALSE)
set.seed(0)

df1 <- data.frame(df_pivot)

df1$infancy_vac <- as.numeric(factor(df1$infancy_vac, labels = c(1,2),
                                     levels = c("wP", "aP"), exclude = NA))
df1$biological_sex <- as.numeric(factor(df1$biological_sex,
                                        labels = c(1,2), exclude = NA))

for (ltCol in LOG_TRANS_COLS){
   df1[, paste("normalized", ltCol, sep = "_")] <- log2(df1[, ltCol]+1)
}

corrDf <- data.frame()
pvalDf <- data.frame()

x <- c("Monocytes_D0", "normalized_CCL3_D0", "normalized_IgG_PT_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction_FC(df1, corrDf, pvalDf, "3Baseline", y, x)
corrDf <- data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])


x <- c("age_at_boost", "infancy_vac", "biological_sex",
       "Monocytes_D0", "normalized_IgG_PT_D0", "normalized_CCL3_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction_FC(df1, corrDf, pvalDf, "Demography_3Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])


x <- c("Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
       'normalized_IgG1_FHA_D0', 'normalized_IgG_FHA_D0',
       'normalized_IgG_PRN_D0', 'normalized_IgG1_PT_D0',
       'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
       "normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction_FC(df1, corrDf, pvalDf, "12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])


x <- c("age_at_boost", "infancy_vac", "biological_sex",
       "Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
       'normalized_IgG_FHA_D0','normalized_IgG_PRN_D0',
       'normalized_IgG1_PT_D0','normalized_IgG1_FHA_D0',
       'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
       "normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")

res <- lassoFunction_FC(df1, corrDf, pvalDf, "Demography_12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
knitr::opts_chunk$set(warning = FALSE, message = FALSE)

colOrder <- c("IgG_PT_D14", "IgG_PT_D14_FC",
              "Monocytes_D1", "Monocytes_D1_FC",
              "CCL3_D3", "CCL3_D3_FC")

colnames(corrDf)[colnames(corrDf)=="X3Baseline"] <- "3 Baseline "
colnames(corrDf)[colnames(corrDf)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(corrDf)[colnames(corrDf)=="X12Baseline"] <- "12 Baseline"
colnames(corrDf)[colnames(corrDf)=="Demography_12 Baseline"] <- "Demography + 12 Baseline"

colnames(pvalDf)[colnames(pvalDf)=="X3Baseline"] <- "3 Baseline "
colnames(pvalDf)[colnames(pvalDf)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(pvalDf)[colnames(pvalDf)=="X12Baseline"] <- "12 Baseline"
colnames(pvalDf)[colnames(pvalDf)=="Demography_12 Baseline"] <- "Demography + 12 Baseline"

corrDf <- t(corrDf)[, colOrder]
pvalDf <- t(pvalDf)[, colOrder]
colnames(corrDf)[colnames(corrDf)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(corrDf)[colnames(corrDf)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(corrDf)[colnames(corrDf)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(corrDf)[colnames(corrDf)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(corrDf)[colnames(corrDf)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(corrDf)[colnames(corrDf)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"

colnames(pvalDf)[colnames(pvalDf)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(pvalDf)[colnames(pvalDf)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(pvalDf)[colnames(pvalDf)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(pvalDf)[colnames(pvalDf)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(pvalDf)[colnames(pvalDf)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(pvalDf)[colnames(pvalDf)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"

# png(here(paste("./results/",  "/LASSO_corelationHeatmap_predictBytask_norm.png", sep = "")),
#     width = 15, height = 10, units = 'in', res = 300 )
maxCorr <- max(abs(as.matrix(corrDf)), na.rm = T)
corrplot(as.matrix(corrDf),
         tl.col = 'black',
         addCoef.col = 1,
         number.cex = 1.2, tl.srt = 60,
         p.mat = as.matrix(pvalDf),
         sig.level = 0.05,
         col.lim = c(-maxCorr, maxCorr),
         cl.ratio = 0.2,
         col = colorRampPalette(c("blue", "white","red"))(100))
# dev.off()