suppressPackageStartupMessages({
library(verification)
library(pROC)
library(discretefit)
library(dplyr)
library(ggplot2)
library(tidyr)
library(tidyverse)
library(corrplot)
library(data.table)
library(readr)
library(kableExtra)
library(glmnet)
library(here)
library(kableExtra)
library(formattable)
library(colorRamp2)
library(ComplexHeatmap)
library(GetoptLong)
library(ellipse)
})
DATASET <- c("2020_dataset", "2021_dataset")
TIMEPOINTS <- c(0, 1, 3, 14)
dataFile <- "combined_dataset2020_2021.csv"
META_COLS <- c("specimen_id", "subject_id", "timepoint", "dataset",
"biological_sex", "infancy_vac", "age_at_boost")
ABTITER_COLS <- c("IgG_PT",'IgG_FHA','IgG_PRN','IgG1_PT',
'IgG1_FHA', 'IgG4_PT', 'IgG4_FHA')
RNA_COLS <- c("CCL3", "IL6", "NFKBIA")
CELL_COLS <- c("Monocytes", "CD4Tcells")
DEMOGRAPHY_COLS <- c("age_at_boost", "biological_sex", "infancy_vac")
TASK_COLS <- c("Monocytes_D1", "CCL3_D3", "IgG_PT_D14")
TASKS_BASELINES <- c("Monocytes_D0", "CCL3_D0", "IgG_PT_D0")
BASE_COLS <- c("Monocytes_D0", "CD4Tcells_D0", "IgG_PT_D0", 'IgG_FHA_D0',
'IgG_PRN_D0','IgG1_PT_D0', 'IgG1_FHA_D0', 'IgG4_PT_D0',
'IgG4_FHA_D0', "CCL3_D0", "IL6_D0", "NFKBIA_D0")
LOG_TRANS_COLS <- c("CCL3_D0", "IgG_PT_D0","IgG_FHA_D0",
'IgG_PRN_D0','IgG1_PT_D0','IgG1_FHA_D0',
'IgG4_PT_D0','IgG4_FHA_D0', "IL6_D0",
"NFKBIA_D0", "CCL3_D3", "IgG_PT_D14")
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
df_source <- readRDS(here("./data/master_normalized_data_challenge2_train.RDS"))
metaDf <- data.frame(df_source[["subject_specimen"]])
metaDf["age_at_boost"] <- as.numeric(round(difftime(metaDf$date_of_boost, metaDf$year_of_birth,units="weeks")/52, 2))
metaDf <- metaDf[, META_COLS]
abtiterDf <- data.frame(df_source[["plasma_antibody_levels_wide"]])
abtiterDf <- data.frame(abtiterDf[, ABTITER_COLS])
colnames(abtiterDf) <- ABTITER_COLS
abtiterDf["specimen_id"] <- as.numeric(row.names(abtiterDf))
rnaDf <- data.frame(df_source[["pbmc_gene_expression_wide"]])
tasks_seq <- c('ENSG00000277632','ENSG00000136244','ENSG00000100906')
for (i in 1:length(tasks_seq)){
rnaDf <- data.frame(rnaDf %>% rename_at(vars(starts_with(tasks_seq[i])), ~RNA_COLS[i]))
}
rnaDf <- data.frame(rnaDf[, RNA_COLS])
colnames(rnaDf) <- RNA_COLS
rnaDf["specimen_id"] <- as.numeric(row.names(rnaDf))
cellDf <- data.frame(df_source[["pbmc_cell_frequency_wide"]])
cellDf <- data.frame(cellDf[, CELL_COLS])
colnames(cellDf) <- CELL_COLS
cellDf["specimen_id"] <- as.numeric(row.names(cellDf))
list_df <- list(metaDf, cellDf, abtiterDf, rnaDf)
df_merge <- list_df %>% reduce(full_join, by="specimen_id")
df_merge <- df_merge[df_merge$timepoint %in% TIMEPOINTS, ]
df_pivot <- df_merge[, names(df_merge)!="specimen_id"] %>%
pivot_wider(id_cols=c("subject_id", "dataset", "biological_sex",
"infancy_vac", "age_at_boost"),
names_from = timepoint,
values_from = all_of(c(CELL_COLS, RNA_COLS, ABTITER_COLS)),
names_sep = "_D")
df_pivot <- df_pivot[df_pivot$dataset %in% DATASET, ]
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
df <- df_pivot[, c("subject_id", DEMOGRAPHY_COLS, BASE_COLS, TASK_COLS)]
df$infancy_vac <- as.numeric(factor(df$infancy_vac, labels = c(1,2), levels = c("wP", "aP"), exclude = NA))
df$biological_sex <- as.numeric(factor(df$biological_sex, labels = c(1,2), exclude = NA))
for (ltCol in LOG_TRANS_COLS){
df[, paste("normalized", ltCol, sep = "_")] <- log2(df[, ltCol]+1)
}
targetX <- c("Monocytes_D0", "CCL3_D0", "IgG_PT_D0")
targetY <- c("Monocytes_D1","CCL3_D3", "IgG_PT_D14",
"normalized_CCL3_D3", "normalized_IgG_PT_D14")
fc_cols <- paste(targetY, "FC", sep="_")
#
ranked_cols <- paste(c("age_at_boost", targetX, targetY, fc_cols), "Rank", sep="_")
rankingFunction <- function(x) {
as.numeric(rank(-x, ties.method = "average", na.last = "keep"))
}
targetX <- c("Monocytes_D0", "CCL3_D0", "IgG_PT_D0",
"normalized_CCL3_D0", "normalized_IgG_PT_D0")
targetY <- c("Monocytes_D1","CCL3_D3", "IgG_PT_D14",
"normalized_CCL3_D3", "normalized_IgG_PT_D14")
df[,"Monocytes_D1_FC"] <- df[, "Monocytes_D1"] / df[, "Monocytes_D0"]
df[,"CCL3_D3_FC"] <- df[, "CCL3_D3"] / df[, "CCL3_D0"]
df[,"IgG_PT_D14_FC"] <- df[, "IgG_PT_D14"] / df[, "IgG_PT_D0"]
df[,"normalized_CCL3_D3_FC"] <- df[, "normalized_CCL3_D3"] / df[, "normalized_CCL3_D0"]
df[,"normalized_IgG_PT_D14_FC"] <- df[, "normalized_IgG_PT_D14"] / df[, "normalized_IgG_PT_D0"]
# df[, ranked_cols] <- apply(df[, c("age_at_boost", targetX, targetY, fc_cols)],
# 2, rankingFunction)
df <- data.frame(df)
lassoFunction <- function(df, corrDf, pvalDf, model, y, x){
for (j in 1:length(y)){
subDf <- na.omit(df[, c(x, y[j], sub("normalized_", "", y[j]))])
predVals <- c()
trueVals<- c()
###print(c(y[j], sub("normalized_", "", y[j])))
for (i in 1:nrow(subDf)){
oneout <- subDf[-c(i), ]
suppressWarnings(fit_1<-cv.glmnet(x=as.matrix(oneout[, x]),
oneout[,y[j]], family='gaussian',
alpha=1, nfolds=nrow(oneout)-1))
predictor <- data.frame(subDf[i, x])
colnames(predictor) <- x
preds<-predict(fit_1, newx=as.matrix(predictor), s='lambda.min')
predVals <- c(predVals, preds)
trueVals <- c(trueVals, subDf[i, sub("normalized_", "", y[j])])
}
trueVals <- rank(trueVals, ties.method = "average", na.last = "keep")
predVals <- rank(predVals, ties.method = "average", na.last = "keep")
correlations <- cor.test(trueVals, predVals, method="spearman")
pvalDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$p.value)
corrDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$estimate)
}
return(list(corrDf, pvalDf))
}
lassoFunction_FC <- function(df, corrDf, pvalDf, model, y, x){
for (j in 1:length(y)){
pattern <- sub("_D.*", "", y[j])
denom <- str_subset(x, pattern)
subDf <- na.omit(df[, c(x, y[j], sub("normalized_", "", y[j]), denom, sub("normalized_", "", denom))])
predVals <- c()
trueVals<- c()
predVals_FC <- c()
trueVals_FC <- c()
###print(c(y[j], denom))
for (i in 1:nrow(subDf)){
oneout <- subDf[-c(i), ]
suppressWarnings(fit_1<-cv.glmnet(x=as.matrix(oneout[, x]), oneout[,y[j]],
family='gaussian', alpha=1, nfolds=nrow(oneout)-1))
predictor <- data.frame(subDf[i, x])
colnames(predictor) <- x
preds<-predict(fit_1, newx=as.matrix(predictor), s='lambda.min')
predVals <- c(predVals, preds)
trueVals <- c(trueVals, subDf[i, sub("normalized_", "", y[j])])
predVals_FC <- c(predVals_FC, preds/subDf[i, denom])
trueVals_FC <- c(trueVals_FC, subDf[i, sub("normalized_", "", y[j])]/subDf[i, sub("normalized_", "", denom)])
}
trueVals <- rank(trueVals, ties.method = "average", na.last = "keep")
predVals <- rank(predVals, ties.method = "average", na.last = "keep")
correlations <- cor.test(trueVals, predVals, method="spearman")
pvalDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$p.value)
corrDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$estimate)
trueVals_FC <- rank(trueVals_FC, ties.method = "average", na.last = "keep")
predVals_FC <- rank(predVals_FC, ties.method = "average", na.last = "keep")
correlations_FC <- cor.test(trueVals_FC, predVals_FC, method="spearman")
pvalDf[paste(sub("normalized_", "", y[j]), "FC", sep="_"), model] <- as.matrix(correlations_FC$p.value)
corrDf[paste(sub("normalized_", "", y[j]), "FC", sep="_"), model] <- as.matrix(correlations_FC$estimate)
}
return(list(corrDf, pvalDf))
}
lassoFunction_FC_normalized <- function(df, corrDf, pvalDf, model, y, x){
for (j in 1:length(y)){
subDf <- na.omit(df[, c(x, y[j], sub("normalized_", "", y[j]))])
predVals <- c()
trueVals<- c()
###print(c(y[j], sub("normalized_", "", y[j])))
for (i in 1:nrow(subDf)){
oneout <- subDf[-c(i), ]
suppressWarnings(fit_1<-cv.glmnet(x=as.matrix(oneout[, x]),
oneout[,y[j]], family='gaussian',
alpha=1, nfolds=nrow(oneout)-1))
predictor <- data.frame(subDf[i, x])
colnames(predictor) <- x
preds<-predict(fit_1, newx=as.matrix(predictor), s='lambda.min')
predVals <- c(predVals, preds)
trueVals <- c(trueVals, subDf[i, y[j]])
}
trueVals <- rank(trueVals, ties.method = "average", na.last = "keep")
predVals <- rank(predVals, ties.method = "average", na.last = "keep")
correlations <- cor.test(trueVals, predVals, method="spearman")
pvalDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$p.value)
corrDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$estimate)
}
return(list(corrDf, pvalDf))
}
The model is trained based on the foldchange and compared with
foldchange
set.seed(0)
corrDf <- data.frame()
pvalDf <- data.frame()
corrDf_FC <- data.frame()
pvalDf_FC <- data.frame()
x <- c("Monocytes_D0", "normalized_CCL3_D0", "normalized_IgG_PT_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction(df, corrDf, pvalDf, "3Baseline", y, x)
corrDf <- data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
x <- c("Monocytes_D0", "normalized_CCL3_D0", "normalized_IgG_PT_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")
res <- lassoFunction(df, corrDf_FC, pvalDf_FC, "3Baseline", y, x)
corrDf_FC <- data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])
x <- c("age_at_boost", "infancy_vac", "biological_sex",
"Monocytes_D0", "normalized_IgG_PT_D0", "normalized_CCL3_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction(df, corrDf, pvalDf, "Demography_3Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
x <- c("age_at_boost", "infancy_vac", "biological_sex",
"Monocytes_D0", "normalized_IgG_PT_D0", "normalized_CCL3_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")
res <- lassoFunction(df, corrDf_FC, pvalDf_FC, "Demography_3Baseline", y, x)
corrDf_FC <-data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])
x <- c("Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
'normalized_IgG1_FHA_D0', 'normalized_IgG_FHA_D0',
'normalized_IgG_PRN_D0', 'normalized_IgG1_PT_D0',
'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
"normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction(df, corrDf, pvalDf, "12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
x <- c("Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
'normalized_IgG1_FHA_D0', 'normalized_IgG_FHA_D0',
'normalized_IgG_PRN_D0', 'normalized_IgG1_PT_D0',
'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
"normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")
res <- lassoFunction(df, corrDf_FC, pvalDf_FC, "12Baseline", y, x)
corrDf_FC <-data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])
x <- c("age_at_boost", "infancy_vac", "biological_sex",
"Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
'normalized_IgG_FHA_D0','normalized_IgG_PRN_D0',
'normalized_IgG1_PT_D0','normalized_IgG1_FHA_D0',
'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
"normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction(df, corrDf, pvalDf, "Demography_12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
x <- c("age_at_boost", "infancy_vac", "biological_sex",
"Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
'normalized_IgG_FHA_D0','normalized_IgG_PRN_D0',
'normalized_IgG1_PT_D0','normalized_IgG1_FHA_D0',
'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
"normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")
res <- lassoFunction(df, corrDf_FC, pvalDf_FC, "Demography_12Baseline", y, x)
corrDf_FC <-data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
corrDf_bind <- rbind(corrDf, corrDf_FC)
pvalDf_bind <- rbind(pvalDf, pvalDf_FC)
colOrder <- c("Monocytes_D1", "Monocytes_D1_FC", "CCL3_D3",
"CCL3_D3_FC", "IgG_PT_D14", "IgG_PT_D14_FC")
colnames(corrDf_bind)[colnames(corrDf_bind)=="X3Baseline"] <- "3 Baseline "
colnames(corrDf_bind)[colnames(corrDf_bind)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(corrDf_bind)[colnames(corrDf_bind)=="X14Baseline"] <- "14 Baseline"
colnames(corrDf_bind)[colnames(corrDf_bind)=="Demography_14Baseline"] <- "Demography + 14 Baseline"
#
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="X3Baseline"] <- "3 Baseline "
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="X14Baseline"] <- "14 Baseline"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Demography_14 Baseline"] <- "Demography + 14 Baseline"
corrDf_bind <- t(corrDf_bind)[, colOrder]
pvalDf_bind <- t(pvalDf_bind)[, colOrder]
colnames(corrDf_bind)[colnames(corrDf_bind)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"
#
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"
# png(here(paste("./results/", "/LASSO_corelationHeatmap_predictByFoldchange.png", sep = "")),
# width = 15, height = 10, units = 'in', res = 300 )
maxCorr <- max(abs(as.matrix(corrDf_bind)), na.rm = T)
corrplot(as.matrix(corrDf_bind),
tl.col = 'black',
addCoef.col = 1,
# mar=c(0,0,0,3),
number.cex = 1.2, tl.srt = 60,
p.mat = as.matrix(pvalDf_bind),
sig.level = 0.05,
col.lim = c(-maxCorr, maxCorr),
cl.ratio = 0.2,
col = colorRampPalette(c("blue", "white","red"))(100))
# dev.off()

Model for fold changea re being compared to the normalized data
set.seed(0)
corrDf <- data.frame()
pvalDf <- data.frame()
corrDf_FC <- data.frame()
pvalDf_FC <- data.frame()
x <- c("Monocytes_D0", "normalized_CCL3_D0", "normalized_IgG_PT_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction(df, corrDf, pvalDf, "3Baseline", y, x)
corrDf <- data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
x <- c("Monocytes_D0", "normalized_CCL3_D0", "normalized_IgG_PT_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")
res <- lassoFunction_FC_normalized(df, corrDf_FC, pvalDf_FC, "3Baseline", y, x)
corrDf_FC <- data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])
x <- c("age_at_boost", "infancy_vac", "biological_sex",
"Monocytes_D0", "normalized_IgG_PT_D0", "normalized_CCL3_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction(df, corrDf, pvalDf, "Demography_3Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
x <- c("age_at_boost", "infancy_vac", "biological_sex",
"Monocytes_D0", "normalized_IgG_PT_D0", "normalized_CCL3_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")
res <- lassoFunction_FC_normalized(df, corrDf_FC, pvalDf_FC, "Demography_3Baseline", y, x)
corrDf_FC <-data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])
x <- c("Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
'normalized_IgG1_FHA_D0', 'normalized_IgG_FHA_D0',
'normalized_IgG_PRN_D0', 'normalized_IgG1_PT_D0',
'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
"normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction(df, corrDf, pvalDf, "12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
x <- c("Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
'normalized_IgG1_FHA_D0', 'normalized_IgG_FHA_D0',
'normalized_IgG_PRN_D0', 'normalized_IgG1_PT_D0',
'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
"normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")
res <- lassoFunction_FC_normalized(df, corrDf_FC, pvalDf_FC, "12Baseline", y, x)
corrDf_FC <-data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])
x <- c("age_at_boost", "infancy_vac", "biological_sex",
"Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
'normalized_IgG_FHA_D0','normalized_IgG_PRN_D0',
'normalized_IgG1_PT_D0','normalized_IgG1_FHA_D0',
'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
"normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction(df, corrDf, pvalDf, "Demography_12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
x <- c("age_at_boost", "infancy_vac", "biological_sex",
"Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
'normalized_IgG_FHA_D0','normalized_IgG_PRN_D0',
'normalized_IgG1_PT_D0','normalized_IgG1_FHA_D0',
'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
"normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1_FC", "normalized_CCL3_D3_FC", "normalized_IgG_PT_D14_FC")
res <- lassoFunction_FC_normalized(df, corrDf_FC, pvalDf_FC, "Demography_12Baseline", y, x)
corrDf_FC <-data.frame(res[[1]])
pvalDf_FC <- data.frame(res[[2]])
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
corrDf_bind <- rbind(corrDf, corrDf_FC)
pvalDf_bind <- rbind(pvalDf, pvalDf_FC)
colOrder <- c("Monocytes_D1", "Monocytes_D1_FC", "CCL3_D3",
"CCL3_D3_FC", "IgG_PT_D14", "IgG_PT_D14_FC")
colnames(corrDf_bind)[colnames(corrDf_bind)=="X3Baseline"] <- "3 Baseline "
colnames(corrDf_bind)[colnames(corrDf_bind)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(corrDf_bind)[colnames(corrDf_bind)=="X14Baseline"] <- "14 Baseline"
colnames(corrDf_bind)[colnames(corrDf_bind)=="Demography_14Baseline"] <- "Demography + 14 Baseline"
#
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="X3Baseline"] <- "3 Baseline "
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="X14Baseline"] <- "14 Baseline"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Demography_14 Baseline"] <- "Demography + 14 Baseline"
corrDf_bind <- t(corrDf_bind)[, colOrder]
pvalDf_bind <- t(pvalDf_bind)[, colOrder]
colnames(corrDf_bind)[colnames(corrDf_bind)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(corrDf_bind)[colnames(corrDf_bind)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"
#
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(pvalDf_bind)[colnames(pvalDf_bind)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"
# png(here(paste("./results/", "/LASSO_corelationHeatmap_predictByFoldchange.png", sep = "")),
# width = 15, height = 10, units = 'in', res = 300 )
maxCorr <- max(abs(as.matrix(corrDf_bind)), na.rm = T)
corrplot(as.matrix(corrDf_bind),
tl.col = 'black',
addCoef.col = 1,
# mar=c(0,0,0,3),
number.cex = 1.2, tl.srt = 60,
p.mat = as.matrix(pvalDf_bind),
sig.level = 0.05,
col.lim = c(-maxCorr, maxCorr),
cl.ratio = 0.2,
col = colorRampPalette(c("blue", "white","red"))(100))
# dev.off()

the model is trained based on normalized task and the foldchange
been estimated and result been compared with foldchange
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
set.seed(0)
df1 <- data.frame(df_pivot)
df1$infancy_vac <- as.numeric(factor(df1$infancy_vac, labels = c(1,2),
levels = c("wP", "aP"), exclude = NA))
df1$biological_sex <- as.numeric(factor(df1$biological_sex,
labels = c(1,2), exclude = NA))
for (ltCol in LOG_TRANS_COLS){
df1[, paste("normalized", ltCol, sep = "_")] <- log2(df1[, ltCol]+1)
}
corrDf <- data.frame()
pvalDf <- data.frame()
x <- c("Monocytes_D0", "normalized_CCL3_D0", "normalized_IgG_PT_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction_FC(df1, corrDf, pvalDf, "3Baseline", y, x)
corrDf <- data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
x <- c("age_at_boost", "infancy_vac", "biological_sex",
"Monocytes_D0", "normalized_IgG_PT_D0", "normalized_CCL3_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction_FC(df1, corrDf, pvalDf, "Demography_3Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
x <- c("Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
'normalized_IgG1_FHA_D0', 'normalized_IgG_FHA_D0',
'normalized_IgG_PRN_D0', 'normalized_IgG1_PT_D0',
'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
"normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction_FC(df1, corrDf, pvalDf, "12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
x <- c("age_at_boost", "infancy_vac", "biological_sex",
"Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
'normalized_IgG_FHA_D0','normalized_IgG_PRN_D0',
'normalized_IgG1_PT_D0','normalized_IgG1_FHA_D0',
'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
"normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction_FC(df1, corrDf, pvalDf, "Demography_12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
colOrder <- c("IgG_PT_D14", "IgG_PT_D14_FC",
"Monocytes_D1", "Monocytes_D1_FC",
"CCL3_D3", "CCL3_D3_FC")
colnames(corrDf)[colnames(corrDf)=="X3Baseline"] <- "3 Baseline "
colnames(corrDf)[colnames(corrDf)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(corrDf)[colnames(corrDf)=="X12Baseline"] <- "12 Baseline"
colnames(corrDf)[colnames(corrDf)=="Demography_14Baseline"] <- "Demography + 12 Baseline"
colnames(pvalDf)[colnames(pvalDf)=="X3Baseline"] <- "3 Baseline "
colnames(pvalDf)[colnames(pvalDf)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(pvalDf)[colnames(pvalDf)=="X12Baseline"] <- "12 Baseline"
colnames(pvalDf)[colnames(pvalDf)=="Demography_14 Baseline"] <- "Demography + 12 Baseline"
corrDf <- t(corrDf)[, colOrder]
pvalDf <- t(pvalDf)[, colOrder]
colnames(corrDf)[colnames(corrDf)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(corrDf)[colnames(corrDf)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(corrDf)[colnames(corrDf)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(corrDf)[colnames(corrDf)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(corrDf)[colnames(corrDf)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(corrDf)[colnames(corrDf)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"
colnames(pvalDf)[colnames(pvalDf)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(pvalDf)[colnames(pvalDf)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(pvalDf)[colnames(pvalDf)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(pvalDf)[colnames(pvalDf)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(pvalDf)[colnames(pvalDf)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(pvalDf)[colnames(pvalDf)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"
# png(here(paste("./results/", "/LASSO_corelationHeatmap_predictBytask.png", sep = "")),
# width = 15, height = 10, units = 'in', res = 300 )
maxCorr <- max(abs(as.matrix(corrDf)), na.rm = T)
corrplot(as.matrix(corrDf),
tl.col = 'black',
addCoef.col = 1,
number.cex = 1.2, tl.srt = 60,
p.mat = as.matrix(pvalDf),
sig.level = 0.05,
col.lim = c(-maxCorr, maxCorr),
cl.ratio = 0.2,
col = colorRampPalette(c("blue", "white","red"))(100))
# dev.off()

lassoFunction_FC <- function(df, corrDf, pvalDf, model, y, x){
for (j in 1:length(y)){
pattern <- sub("_D.*", "", y[j])
denom <- str_subset(x, pattern)
subDf <- na.omit(df[, c(x, y[j], sub("normalized_", "", y[j]), denom, sub("normalized_", "", denom))])
predVals <- c()
trueVals<- c()
predVals_FC <- c()
trueVals_FC <- c()
###print(c(y[j], denom))
for (i in 1:nrow(subDf)){
oneout <- subDf[-c(i), ]
suppressWarnings(fit_1<-cv.glmnet(x=as.matrix(oneout[, x]), oneout[,y[j]],
family='gaussian', alpha=1, nfolds=nrow(oneout)-1))
predictor <- data.frame(subDf[i, x])
colnames(predictor) <- x
preds<-predict(fit_1, newx=as.matrix(predictor), s='lambda.min')
predVals <- c(predVals, preds)
trueVals <- c(trueVals, subDf[i, y[j]])
predVals_FC <- c(predVals_FC, preds/subDf[i, denom])
trueVals_FC <- c(trueVals_FC, subDf[i, y[j]]/subDf[i, denom])
}
trueVals <- rank(trueVals, ties.method = "average", na.last = "keep")
predVals <- rank(predVals, ties.method = "average", na.last = "keep")
correlations <- cor.test(trueVals, predVals, method="spearman")
pvalDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$p.value)
corrDf[sub("normalized_", "", y[j]), model] <- as.matrix(correlations$estimate)
trueVals_FC <- rank(trueVals_FC, ties.method = "average", na.last = "keep")
predVals_FC <- rank(predVals_FC, ties.method = "average", na.last = "keep")
correlations_FC <- cor.test(trueVals_FC, predVals_FC, method="spearman")
pvalDf[paste(sub("normalized_", "", y[j]), "FC", sep="_"), model] <- as.matrix(correlations_FC$p.value)
corrDf[paste(sub("normalized_", "", y[j]), "FC", sep="_"), model] <- as.matrix(correlations_FC$estimate)
}
return(list(corrDf, pvalDf))
}
the model is trained based on normalized task and the foldchange
been estimated and result been compared with foldchange of normalized
values
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
set.seed(0)
df1 <- data.frame(df_pivot)
df1$infancy_vac <- as.numeric(factor(df1$infancy_vac, labels = c(1,2),
levels = c("wP", "aP"), exclude = NA))
df1$biological_sex <- as.numeric(factor(df1$biological_sex,
labels = c(1,2), exclude = NA))
for (ltCol in LOG_TRANS_COLS){
df1[, paste("normalized", ltCol, sep = "_")] <- log2(df1[, ltCol]+1)
}
corrDf <- data.frame()
pvalDf <- data.frame()
x <- c("Monocytes_D0", "normalized_CCL3_D0", "normalized_IgG_PT_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction_FC(df1, corrDf, pvalDf, "3Baseline", y, x)
corrDf <- data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
x <- c("age_at_boost", "infancy_vac", "biological_sex",
"Monocytes_D0", "normalized_IgG_PT_D0", "normalized_CCL3_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction_FC(df1, corrDf, pvalDf, "Demography_3Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
x <- c("Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
'normalized_IgG1_FHA_D0', 'normalized_IgG_FHA_D0',
'normalized_IgG_PRN_D0', 'normalized_IgG1_PT_D0',
'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
"normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction_FC(df1, corrDf, pvalDf, "12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
x <- c("age_at_boost", "infancy_vac", "biological_sex",
"Monocytes_D0", "CD4Tcells_D0", "normalized_IgG_PT_D0",
'normalized_IgG_FHA_D0','normalized_IgG_PRN_D0',
'normalized_IgG1_PT_D0','normalized_IgG1_FHA_D0',
'normalized_IgG4_PT_D0', 'normalized_IgG4_FHA_D0',
"normalized_CCL3_D0", "normalized_IL6_D0", "normalized_NFKBIA_D0")
y <- c("Monocytes_D1", "normalized_CCL3_D3", "normalized_IgG_PT_D14")
res <- lassoFunction_FC(df1, corrDf, pvalDf, "Demography_12Baseline", y, x)
corrDf <-data.frame(res[[1]])
pvalDf <- data.frame(res[[2]])
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
colOrder <- c("IgG_PT_D14", "IgG_PT_D14_FC",
"Monocytes_D1", "Monocytes_D1_FC",
"CCL3_D3", "CCL3_D3_FC")
colnames(corrDf)[colnames(corrDf)=="X3Baseline"] <- "3 Baseline "
colnames(corrDf)[colnames(corrDf)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(corrDf)[colnames(corrDf)=="X12Baseline"] <- "12 Baseline"
colnames(corrDf)[colnames(corrDf)=="Demography_12 Baseline"] <- "Demography + 12 Baseline"
colnames(pvalDf)[colnames(pvalDf)=="X3Baseline"] <- "3 Baseline "
colnames(pvalDf)[colnames(pvalDf)=="Demography_3Baseline"] <- "Demography + 3 Baseline"
colnames(pvalDf)[colnames(pvalDf)=="X12Baseline"] <- "12 Baseline"
colnames(pvalDf)[colnames(pvalDf)=="Demography_12 Baseline"] <- "Demography + 12 Baseline"
corrDf <- t(corrDf)[, colOrder]
pvalDf <- t(pvalDf)[, colOrder]
colnames(corrDf)[colnames(corrDf)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(corrDf)[colnames(corrDf)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(corrDf)[colnames(corrDf)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(corrDf)[colnames(corrDf)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(corrDf)[colnames(corrDf)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(corrDf)[colnames(corrDf)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"
colnames(pvalDf)[colnames(pvalDf)=="Monocytes_D1"] <- "2.1) Monocytes_D1_Rank"
colnames(pvalDf)[colnames(pvalDf)=="Monocytes_D1_FC"] <- "2.2) Monocytes_D1_FC_Rank"
colnames(pvalDf)[colnames(pvalDf)=="CCL3_D3"] <- "3.1) CCL3_D3_Rank"
colnames(pvalDf)[colnames(pvalDf)=="CCL3_D3_FC"] <- "3.2) CCL3_D3_FC_Rank"
colnames(pvalDf)[colnames(pvalDf)=="IgG_PT_D14"] <- "1.1) IgG_PT_D14_titer_Rank"
colnames(pvalDf)[colnames(pvalDf)=="IgG_PT_D14_FC"] <- "1.2) IgG_PT_D14_FC_Rank"
# png(here(paste("./results/", "/LASSO_corelationHeatmap_predictBytask_norm.png", sep = "")),
# width = 15, height = 10, units = 'in', res = 300 )
maxCorr <- max(abs(as.matrix(corrDf)), na.rm = T)
corrplot(as.matrix(corrDf),
tl.col = 'black',
addCoef.col = 1,
number.cex = 1.2, tl.srt = 60,
p.mat = as.matrix(pvalDf),
sig.level = 0.05,
col.lim = c(-maxCorr, maxCorr),
cl.ratio = 0.2,
col = colorRampPalette(c("blue", "white","red"))(100))
# dev.off()
