library(omicade4)
## Loading required package: ade4
library(mogsa)
library(RSpectra)
# library(lubridate)
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-7
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:mogsa':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(cowplot)
library(ggplot2)
setwd("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/results/main/cmi_pb_datasets/processed/harmonized")

# Read in metadata
meta.2020<-read.table('clinical_metadata.2020.tsv',sep='\t',header=TRUE,stringsAsFactors=TRUE,row.names=1)
meta.2021<-read.table('clinical_metadata.2021.tsv',sep='\t',header=TRUE,stringsAsFactors=TRUE,row.names=1)

files from imputed data is already normalised so we don’t need to normalise them

# imputed_dir is path to local drive where data is saved
setwd("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/results/main/cmi_pb_datasets/processed/imputed")

# Import imputed datasets
rnaseq_baseline_mat_imputed_20 <- read.csv('rnaseq_baseline_mat_imputed_20_051022.csv',row.names=1)
cytof_baseline_mat_imputed_20 <- read.csv('cytof_baseline_mat_imputed_20_051022.csv',row.names=1)
olink_baseline_mat_imputed_20 <- read.csv('olink_baseline_mat_imputed_20_051022.csv',row.names=1)
abtiters_baseline_mat_imputed_20 <- read.csv('abtiters_baseline_mat_imputed_20_051022.csv',row.names=1)

rnaseq_baseline_mat_imputed_21 <- read.csv('rnaseq_baseline_mat_imputed_21_051022.csv',row.names=1)
cytof_baseline_mat_imputed_21 <- read.csv('cytof_baseline_mat_imputed_21_051022.csv',row.names=1)
olink_baseline_mat_imputed_21 <- read.csv('olink_baseline_mat_imputed_21_051022.csv',row.names=1)
abtiters_baseline_mat_imputed_21 <- read.csv('abtiters_baseline_mat_imputed_21_051022.csv',row.names=1)

colnames(abtiters_baseline_mat_imputed_21)
##  [1] "IgG.FHA"     "IgG.PRN"     "IgG.PT"      "IgG1.FHA"    "IgG1.FIM2.3"
##  [6] "IgG1.PRN"    "IgG1.PT"     "IgG2.FHA"    "IgG2.FIM2.3" "IgG2.PRN"   
## [11] "IgG2.PT"     "IgG3.FHA"    "IgG3.FIM2.3" "IgG3.PRN"    "IgG3.PT"    
## [16] "IgG4.FHA"    "IgG4.FIM2.3" "IgG4.PRN"    "IgG4.PT"
tasks_seq<-c('ENSG00000277632','ENSG00000136244','ENSG00000100906','ENSG00000229807')
names(rnaseq_baseline_mat_imputed_20[tasks_seq])
## [1] "ENSG00000277632" "ENSG00000136244" "ENSG00000100906" "ENSG00000229807"
distPlot <- function(col, df){
  p <- 
    ggplot(df) +
    aes_string(col)

  if(is.numeric(df[[col]])) {
    p <- p + geom_density()

  } else {
    p <- p + geom_bar()
  }
}

distPlots <- lapply(tasks_seq, distPlot, df=rnaseq_baseline_mat_imputed_20)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot_grid(plotlist = distPlots)

# Get age at boost
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:cowplot':
## 
##     stamp
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
meta.2020$date_of_boost<-parse_date_time(meta.2020$date_of_boost,"ymd")
meta.2020$year_of_birth<-parse_date_time(meta.2020$year_of_birth,"ymd")
meta.2020$age_at_boost<- as.numeric(round(difftime(meta.2020$date_of_boost,
                                                    meta.2020$year_of_birth,units="weeks")/52,2))
meta.2021$date_of_boost<-parse_date_time(meta.2021$date_of_boost,"ymd")
meta.2021$year_of_birth<-parse_date_time(meta.2021$year_of_birth,"ymd")
meta.2021$age_at_boost<- as.numeric(round(difftime(meta.2021$date_of_boost,
                                                    meta.2021$year_of_birth,units="weeks")/52,2))

meta <- rbind(meta.2020[c("age_at_boost", "infancy_vac", "biological_sex")], meta.2021[c("age_at_boost", "infancy_vac", "biological_sex")])

meta$infancy_vac <- as.numeric(meta$infancy_vac)
meta$biological_sex <- as.numeric(meta$biological_sex)
colnames(meta)
## [1] "age_at_boost"   "infancy_vac"    "biological_sex"

add exposure table

library(DBI)
library(RPostgreSQL)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
## 
##     expand, pack, unpack
library(dplyr)
library(readr)

dsn_database = "cmipb_v4_0"   
dsn_hostname = "cmi-pb.lji.org"
dsn_port = "5432"                
dsn_uid = "cmipb"         
dsn_pwd = "b5mq62vW7JE2YUwq"


tryCatch({
    drv <- dbDriver("PostgreSQL")
    print("Connecting to Database…")
    connec <- dbConnect(drv,
                 dbname = dsn_database,
                 host = dsn_hostname,
                 port = dsn_port,
                 user = dsn_uid,
                 password = dsn_pwd)
    print("Database Connected!")
    },
    error=function(cond) {
            print("Unable to connect to Database.")
    })
## [1] "Connecting to Database…"
## [1] "Database Connected!"
# dbListTables(connec)
library(tibble)
library(tidyverse)
## ── Attaching core tidyverse packages ─────────────────── tidyverse 2.0.0.9000 ──
## ✔ forcats 1.0.0     ✔ stringr 1.5.0
## ✔ purrr   1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::combine()   masks mogsa::combine()
## ✖ tidyr::expand()    masks Matrix::expand()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ tidyr::pack()      masks Matrix::pack()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ✖ tidyr::unpack()    masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# library(lubridate)
exposDf <- dbGetQuery(connec, "SELECT * FROM immune_exposure")
ifelse(exposDf$event_start==exposDf$event_end,"Yes","No")
##   [1] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [13] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [25] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [37] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [49] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [61] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [73] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [85] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [97] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
## [109] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
## [121] "Yes"
exposDf %>% count(event_type)
##                 event_type  n
## 1               bronchitis  1
## 2                    covid 36
## 3                     dtap  1
## 4                      hib  2
## 5                      hpv 11
## 6                influenza 32
## 7    japanese encephalitis  1
## 8            meningococcal  8
## 9            mononucleosis  1
## 10            pneumococcal  1
## 11               pneumonia  3
## 12                   polio 10
## 13            strep throat  4
## 14 tick-borne encephalitis  1
## 15                 typhoid  1
## 16               varicella  7
## 17          whooping cough  1
exposDf$exposure_material_type <- as.character(as.numeric(as.factor(exposDf$exposure_material_type)))
exposDf$event_start <- as.character(as.numeric(as.factor(exposDf$event_start)))
# 
exposDf <- dplyr::filter(exposDf, event_type %in% c('covid', 'influenza'))

exposDf1 <- exposDf[, c('subject_id', 'event_type','exposure_material_type', 'event_start')] %>%
              pivot_wider( names_from = event_type,
              values_from = c(event_start, exposure_material_type),
              values_fn= toString,
              values_fill='Not known'
              )

colnames(exposDf1)
## [1] "subject_id"                       "event_start_influenza"           
## [3] "event_start_covid"                "exposure_material_type_influenza"
## [5] "exposure_material_type_covid"
typeof(exposDf1)
## [1] "list"
exposDf2 <- as.data.frame(exposDf1)
for (col in colnames(exposDf1)[2:length(colnames(exposDf1))]){
  
  exposDf2 <- exposDf1[!grepl(',', exposDf1[[col]]), ]
}

exposDf2 <- exposDf2%>% mutate(across(colnames(exposDf2), as.factor))
exposDf2 <- exposDf2%>% mutate(across(colnames(exposDf2), as.numeric))
exposDf2 
## # A tibble: 51 × 5
##    subject_id event_start_influenza event_start_covid exposure_material_type_i…¹
##         <dbl>                 <dbl>             <dbl>                      <dbl>
##  1          3                     1                 1                          1
##  2         38                     1                 1                          1
##  3         41                     3                 1                          1
##  4         46                     2                 1                          1
##  5         44                     2                 1                          1
##  6          2                     1                 3                          1
##  7          4                     2                 3                          1
##  8          5                     1                 3                          1
##  9          6                     1                 3                          1
## 10          7                     3                 3                          1
## # ℹ 41 more rows
## # ℹ abbreviated name: ¹​exposure_material_type_influenza
## # ℹ 1 more variable: exposure_material_type_covid <dbl>
# nullToNA <- function(x) {
#     x[sapply(x, is.null)] <- 'Not known'
#     return(x)
# }
# z=exposDf3[2][1]
# exposDf3 <- lapply(exposDf2, nullToNA)
# exposDf3.df <- t(do.call(rbind,lapply(exposDf3, rbind)))
# 
# # z= dim(exposDf3.df)[1]
# # print(dim(exposDf3.df))
# # print(exposDf3.df[[2]][2])
# # exposDf4 <- as.data.frame(matrix(unlist(exposDf3.df),nrow=dim(exposDf3.df)[1],byrow=TRUE)
# exposDf4 <- as.data.frame(matrix(unlist(exposDf3.df),nrow=dim(exposDf3.df)[1],byrow=FALSE))
# # z= typeof(exposDf4)
# colnames(exposDf4) <- names(exposDf3)
# #
# # z=exposDf3.df$subject_id
# #

All data from abtiter been log2 transformed. Why? to make them normally distributed since our glm uses gaussian as the distribution kernel?

# Get the y data
dataY = read.table('/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/results/yData_task_matrix.common_names.mfi_raw.tsv',sep='\t',header=TRUE,stringsAsFactors=TRUE,row.names=1)

dataY <- dataY[c("IgG.PT.day14", "ENSG00000277632.day3", "Monocytes.day1")]
colnames(dataY) <- c("IgG.PT.day14", "CCL3.day3", "Monocytes.day1")
dataY$IgG.PT.day14 <- log2(dataY[,'IgG.PT.day14']+1)
dataY$CCL3.day3 <- log2(dataY[,'CCL3.day3']+1)

dataY$subject_id <- rownames(dataY)
colnames(dataY)
## [1] "IgG.PT.day14"   "CCL3.day3"      "Monocytes.day1" "subject_id"
distPlot <- function(col, df){
  p <- 
    ggplot(df) +
    aes_string(col)

  if(is.numeric(df[[col]])) {
    p <- p + geom_density()

  } else {
    p <- p + geom_bar()
  }
}

typeof(dataY)
## [1] "list"
# 
distPlots <- lapply(c("IgG.PT.day14", "CCL3.day3", "Monocytes.day1"), distPlot, df=dataY)
plot_grid(plotlist = distPlots)
## Warning: Removed 4 rows containing non-finite values (`stat_density()`).
## Warning: Removed 22 rows containing non-finite values (`stat_density()`).
## Warning: Removed 41 rows containing non-finite values (`stat_density()`).

# df = read.table('/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/results/main/cmi_pb_datasets/processed/harmonized/task_matrix.common_names.mfi_normalised.tsv',sep='\t',header=TRUE,stringsAsFactors=TRUE,row.names=1)
# 
# distPlot <- function(col, df){
#   p <- 
#     ggplot(df) +
#     aes_string(col)
# 
#   if(is.numeric(df[[col]])) {
#     p <- p + geom_density()
# 
#   } else {
#     p <- p + geom_bar()
#   }
# }
# 
# typeof(df)
# colnames(df)
# # 
# distPlots <- lapply(c("IgG.PT_day14", "CCL3_day3", "Monocytes_day1"), distPlot, df=df)
# plot_grid(plotlist = distPlots)
colnames(rnaseq_baseline_mat_imputed_20)[which(names(rnaseq_baseline_mat_imputed_20) == "ENSG00000277632")] <- "CCL3"
colnames(rnaseq_baseline_mat_imputed_21)[which(names(rnaseq_baseline_mat_imputed_21) == "ENSG00000277632")] <- "CCL3"

rnaDf <- rbind(rnaseq_baseline_mat_imputed_20["CCL3"], rnaseq_baseline_mat_imputed_21["CCL3"])

abtiterDf <- rbind(abtiters_baseline_mat_imputed_20[c("IgG.FHA", "IgG.PRN", "IgG.PT")], abtiters_baseline_mat_imputed_21[c("IgG.FHA", "IgG.PRN", "IgG.PT")])

cytofDf <- rbind(cytof_baseline_mat_imputed_20["Monocytes"], cytof_baseline_mat_imputed_21["Monocytes"])

dataDf1 <- merge(rnaDf, abtiterDf, by='row.names', all=T)
colnames(dataDf1)[1] <- "subject_id" 

dataDf2 <- merge(cytofDf, meta, by='row.names', all=T)
colnames(dataDf2)[1] <- "subject_id"

dataX1 <- merge(dataDf1, dataDf2, by='subject_id', all=T)
# dataX <- merge(dataDf1, dataDf2, by='subject_id', all=T)
# colnames(dataX)[1] <- "subject_id"

dataX <- merge(dataX1, exposDf2, by='subject_id', all=T)
# 
dataDf <- merge(dataX, dataY, by='subject_id', all=T)
rownames(dataDf) <- dataDf$subject_id

dataDf
##    subject_id      CCL3    IgG.FHA    IgG.PRN     IgG.PT  Monocytes
## 1           1  5.333531 5.13138189 1.84893838 2.24397113  7.5260062
## 10         10  4.101398 0.14364571 0.09320618 0.04154169 13.4642547
## 11         11  4.542939 2.47376114 3.18404386 1.77698271 10.7905171
## 12         12        NA         NA         NA         NA         NA
## 13         13  3.399718 1.07740175 1.12798355 1.36806068  5.0304685
## 14         14        NA         NA         NA         NA         NA
## 15         15  4.835874 1.14771363 1.03058953 0.38100554 15.6373679
## 16         16        NA         NA         NA         NA         NA
## 17         17  4.512669 1.78634624 0.48701666 3.45553366 14.4574172
## 18         18  4.083384 3.51822961 1.56843058 1.68734290  0.9568001
## 19         19  4.459300 1.04292223 1.84192155 2.16297720 13.5202756
## 2           2        NA         NA         NA         NA         NA
## 20         20  3.963474 1.53996887 0.59107133 2.55022450 21.3991621
## 21         21  3.823138 3.93012107 1.68057253 2.77448207 15.2266453
## 22         22  4.462314 2.14113953 1.30342083 2.16393744 11.9355890
## 23         23  4.421223 1.68946851 0.43547450 0.40194862 11.7041207
## 24         24  3.106516 0.60815892 1.06497238 0.26992483 11.5201600
## 25         25  4.042732 0.34555009 0.80002675 0.74967184 13.4709985
## 26         26  4.735685 0.98270780 1.12156687 0.72685734  5.5320667
## 27         27  3.782618 1.68013778 0.84721352 0.77080873 13.8965682
## 28         28        NA         NA         NA         NA         NA
## 29         29  4.447249 2.58070095 2.87788355 0.99688351 16.0947648
## 3           3  4.599972 1.06795335 3.11313957 1.06789053 13.0245725
## 30         30        NA         NA         NA         NA         NA
## 31         31  4.693487 0.40767306 2.02769318 0.16545091  4.8790233
## 32         32  5.779549 0.80344564 1.30960268 1.40368966 13.5492381
## 33         33  5.311794 0.66955298 0.24197417 0.63824158  7.6518328
## 34         34        NA         NA         NA         NA         NA
## 35         35  5.434762 1.75928512 1.23573946 1.19302502 13.5420630
## 36         36  3.016496 2.00337580 2.33488694 1.04080942 11.2437986
## 37         37        NA         NA         NA         NA         NA
## 38         38  3.017922 0.81897225 0.56487492 0.77149215 13.7721006
## 39         39        NA         NA         NA         NA         NA
## 4           4  4.094574 1.03441003 2.73777402 1.60723380  5.4908380
## 40         40        NA         NA         NA         NA         NA
## 41         41        NA         NA         NA         NA         NA
## 42         42  5.230280 0.86883965 2.56022738 1.11654738 10.6095832
## 43         43  3.871351 1.01708739 1.30963816 1.00310977 13.6337391
## 44         44  4.518409 0.52901785 0.72866760 0.92865661 23.1032762
## 45         45        NA         NA         NA         NA         NA
## 46         46        NA         NA         NA         NA         NA
## 47         47  6.900831 1.56572050 0.54455774 2.65845646 10.1292397
## 48         48  6.731536 0.48194166 1.03140808 0.04154169 14.9597507
## 49         49        NA         NA         NA         NA         NA
## 5           5  4.916572 0.11694600 2.64806823 2.26243525 13.1258248
## 50         50  8.091139 0.61246324 0.32074600 0.50769248  9.3613078
## 51         51        NA         NA         NA         NA         NA
## 52         52  7.288949 0.11694600 1.56152075 1.97201547 28.5490352
## 53         53  9.867189 1.90423998 0.47059211 4.17711376 12.8294284
## 54         54        NA         NA         NA         NA         NA
## 55         55        NA         NA         NA         NA         NA
## 56         56        NA         NA         NA         NA         NA
## 57         57        NA         NA         NA         NA         NA
## 58         58        NA         NA         NA         NA         NA
## 59         59        NA         NA         NA         NA         NA
## 6           6  4.575554 0.46392532 0.12456085 0.27896948 22.7930283
## 60         60        NA         NA         NA         NA         NA
## 61         61 12.042340 3.54131321 0.15131429 1.00000000 12.8005992
## 62         62  6.937333 2.82093049 0.75122163 1.07602222 13.5692650
## 63         63  6.691953 1.95776540 0.11013376 1.79570532 15.3000000
## 64         64  6.760926 2.68882561 0.89982323 2.04705167 12.9000000
## 65         65  7.605079 3.31317657 0.64048964 0.80140188 15.8000000
## 66         66  5.042425 3.74662797 1.43985527 1.32512343 15.8000000
## 67         67  5.889230 2.00974701 0.35723800 0.70618673 34.6000000
## 68         68  5.853946 1.11683339 1.94255138 1.05459788 13.5000000
## 69         69  5.528321 1.65215623 1.21850657 1.07297696 13.1000000
## 7           7        NA         NA         NA         NA         NA
## 70         70  5.815166 3.35167692 1.65402214 0.47027882 32.3000000
## 71         71  5.987707 4.65325882 0.87273399 2.90659418 12.2000000
## 72         72  5.880881 0.28729447 2.12336100 2.08840338 19.9000000
## 73         73  4.529196 0.80435967 1.43128987 1.88327381 15.7000000
## 74         74  3.871745 0.62423924 1.60899360 0.52016941 36.3000000
## 75         75  5.678804 3.05443604 1.81011848 0.17736981 14.4559889
## 76         76  4.663800 4.53841221 0.11013376 3.53643892 18.0000000
## 77         77  8.217396 0.06711778 1.06772479 0.69081102 28.8000000
## 78         78  7.151839 0.10084120 0.25375330 0.99519368 29.0000000
## 79         79  6.969300 0.20652117 0.14721857 2.32256773 41.5000000
## 8           8        NA         NA         NA         NA         NA
## 80         80  7.178107 0.14117982 1.58147601 0.46841860 21.9000000
## 81         81 10.726589 0.07947224 1.13942560 1.81601157 34.9000000
## 82         82  9.413611 0.96199368 0.94934816 0.76723104 22.5000000
## 83         83  8.588209 1.00000000 2.03087458 1.26356779 32.4000000
## 84         84  7.593122 2.31136374 0.44903137 0.28053529 18.9000000
## 85         85  4.937862 3.32067659 0.92948200 0.33276350 19.7000000
## 86         86  5.362224 0.67511620 0.26301839 0.30403242 23.5000000
## 87         87  5.295650 3.21116798 0.96864376 1.07997538 22.9000000
## 88         88  5.567850 1.48284828 1.33113192 1.04544297 17.6000000
## 89         89  5.130601 0.04523958 0.13834498 0.06807137 15.7000000
## 9           9  4.238481 0.46042437 0.20839284 0.28783862  5.7229578
## 90         90  4.837136 0.74851784 1.63121255 0.33985853 20.6000000
## 91         91  4.140370 1.12281459 0.78254880 0.92749667 25.4000000
## 92         92  4.623047 0.04523958 1.03269446 0.23829881 40.6000000
## 93         93  4.616769 0.12543138 1.60575881 1.57747907 16.3000000
## 94         94  4.785446 0.08254440 1.26047137 2.65824315 26.2000000
## 95         95  4.878235 0.37016694 1.00000000 2.29792327 17.3000000
## 96         96  4.356355 0.19946349 0.92277461 0.76421696 34.2000000
##    age_at_boost infancy_vac biological_sex event_start_influenza
## 1         30.80           2              1                     4
## 10        34.68           2              1                     3
## 11        30.76           2              1                     2
## 12        34.68           2              2                     4
## 13        19.63           1              2                     1
## 14        23.70           2              2                     3
## 15        27.71           2              2                     3
## 16        29.66           2              1                     4
## 17        36.82           2              1                     3
## 18        19.73           1              1                     3
## 19        22.81           2              2                     3
## 2         51.25           2              1                     1
## 20        35.78           2              1                     3
## 21        33.77           2              2                     3
## 22        31.77           2              1                     4
## 23        25.82           2              1                     4
## 24        24.79           2              1                     2
## 25        28.80           2              1                     3
## 26        33.85           2              1                     4
## 27        19.80           1              1                     3
## 28        34.85           2              2                     2
## 29        19.80           1              2                     1
## 3         33.89           2              1                     1
## 30        28.84           2              1                     2
## 31        27.83           2              1                     3
## 32        19.88           1              2                     4
## 33        26.87           2              2                     3
## 34        33.93           2              1                     4
## 35        25.86           2              2                     4
## 36        19.88           1              1                     4
## 37        18.91           1              1                     2
## 38        19.88           1              1                     1
## 39        31.92           2              1                     4
## 4         28.76           2              2                     2
## 40        22.89           2              1                     4
## 41        31.96           2              2                     3
## 42        19.92           1              1                     4
## 43        18.91           1              1                     4
## 44        18.91           1              1                     2
## 45        19.98           1              1                     4
## 46        18.91           1              1                     2
## 47        20.98           1              1                     4
## 48        19.11           1              1                     4
## 49        20.11           1              1                     4
## 5         25.75           2              2                     1
## 50        19.98           1              1                     4
## 51        19.98           1              2                     4
## 52        19.07           1              2                    NA
## 53        19.07           1              1                    NA
## 54        20.11           1              1                    NA
## 55        20.11           1              1                    NA
## 56        20.15           1              1                    NA
## 57        21.15           1              1                    NA
## 58        20.15           1              1                    NA
## 59        20.15           1              1                    NA
## 6         28.87           2              1                     1
## 60        20.15           1              2                    NA
## 61        32.38           2              1                    NA
## 62        25.99           2              1                    NA
## 63        23.98           2              1                    NA
## 64        25.99           2              2                    NA
## 65        29.02           2              2                    NA
## 66        43.07           2              1                    NA
## 67        47.24           2              1                    NA
## 68        47.24           2              2                    NA
## 69        29.17           2              1                    NA
## 7         35.97           2              1                     3
## 70        21.15           1              2                    NA
## 71        21.15           1              1                    NA
## 72        28.25           2              1                    NA
## 73        24.23           2              1                    NA
## 74        24.23           2              1                    NA
## 75        21.22           1              1                    NA
## 76        21.22           1              1                    NA
## 77        31.32           2              2                    NA
## 78        26.30           2              1                    NA
## 79        32.32           2              2                    NA
## 8         34.27           2              1                     3
## 80        27.30           2              1                    NA
## 81        26.30           2              2                    NA
## 82        21.28           1              1                    NA
## 83        20.34           1              1                    NA
## 84        22.34           1              1                    NA
## 85        19.39           1              1                    NA
## 86        21.40           1              1                    NA
## 87        19.39           1              2                    NA
## 88        19.39           1              2                    NA
## 89        22.49           1              1                    NA
## 9         20.63           1              2                     1
## 90        20.49           1              1                    NA
## 91        21.49           1              2                    NA
## 92        19.54           1              1                    NA
## 93        23.56           1              1                    NA
## 94        20.55           1              2                    NA
## 95        21.55           1              1                    NA
## 96        19.54           1              2                    NA
##    event_start_covid exposure_material_type_influenza
## 1                  1                                3
## 10                 3                                1
## 11                 3                                1
## 12                 1                                3
## 13                 3                                1
## 14                 3                                1
## 15                 3                                1
## 16                 1                                3
## 17                 3                                1
## 18                 3                                1
## 19                 3                                1
## 2                  3                                1
## 20                 3                                1
## 21                 3                                1
## 22                 1                                3
## 23                 1                                3
## 24                 3                                1
## 25                 3                                1
## 26                 1                                3
## 27                 3                                1
## 28                 3                                1
## 29                 3                                1
## 3                  1                                1
## 30                 3                                1
## 31                 3                                1
## 32                 1                                3
## 33                 3                                1
## 34                 1                                3
## 35                 1                                3
## 36                 2                                3
## 37                 2                                2
## 38                 1                                1
## 39                 1                                3
## 4                  3                                1
## 40                 1                                3
## 41                 1                                1
## 42                 1                                3
## 43                 1                                3
## 44                 1                                1
## 45                 1                                3
## 46                 1                                1
## 47                 1                                3
## 48                 2                                3
## 49                 1                                3
## 5                  3                                1
## 50                 1                                3
## 51                 1                                3
## 52                NA                               NA
## 53                NA                               NA
## 54                NA                               NA
## 55                NA                               NA
## 56                NA                               NA
## 57                NA                               NA
## 58                NA                               NA
## 59                NA                               NA
## 6                  3                                1
## 60                NA                               NA
## 61                NA                               NA
## 62                NA                               NA
## 63                NA                               NA
## 64                NA                               NA
## 65                NA                               NA
## 66                NA                               NA
## 67                NA                               NA
## 68                NA                               NA
## 69                NA                               NA
## 7                  3                                1
## 70                NA                               NA
## 71                NA                               NA
## 72                NA                               NA
## 73                NA                               NA
## 74                NA                               NA
## 75                NA                               NA
## 76                NA                               NA
## 77                NA                               NA
## 78                NA                               NA
## 79                NA                               NA
## 8                  3                                1
## 80                NA                               NA
## 81                NA                               NA
## 82                NA                               NA
## 83                NA                               NA
## 84                NA                               NA
## 85                NA                               NA
## 86                NA                               NA
## 87                NA                               NA
## 88                NA                               NA
## 89                NA                               NA
## 9                  3                                1
## 90                NA                               NA
## 91                NA                               NA
## 92                NA                               NA
## 93                NA                               NA
## 94                NA                               NA
## 95                NA                               NA
## 96                NA                               NA
##    exposure_material_type_covid IgG.PT.day14 CCL3.day3 Monocytes.day1
## 1                             1    7.6475855  8.531381             NA
## 10                            2    2.2695086  7.491853             NA
## 11                            2    8.6987531  7.179909       7.257095
## 12                            1    6.6128578        NA             NA
## 13                            2    5.8841566  9.832890             NA
## 14                            2    8.0768224        NA             NA
## 15                            2    6.6651860  8.118941      10.585489
## 16                            1    6.4541001        NA             NA
## 17                            2    7.4085495  8.209453      16.401488
## 18                            2    7.5228254  6.643856             NA
## 19                            2    7.4194018  7.066089             NA
## 2                             2           NA        NA             NA
## 20                            2    7.0689888  8.909893      26.605583
## 21                            2    5.1846314  7.900867      34.812168
## 22                            1    6.5076306  6.375039             NA
## 23                            1    7.0576625  7.066089             NA
## 24                            2    4.6224481  7.238405             NA
## 25                            2    4.5909272  7.562242             NA
## 26                            1    5.5468623  7.219169      16.108508
## 27                            2    5.4905601  7.592457             NA
## 28                            2    6.8762502        NA             NA
## 29                            2    6.2496495  8.154818      25.083209
## 3                             1    7.0245630  7.888743             NA
## 30                            2    5.9932153        NA             NA
## 31                            2    5.0875933  8.897845       8.545243
## 32                            1    8.0965425  9.353147             NA
## 33                            2    3.8450834  9.550747      17.703064
## 34                            1    7.4372481        NA             NA
## 35                            1    6.2916862  7.774787             NA
## 36                            1    5.4060017  7.584963      17.446750
## 37                            1           NA        NA             NA
## 38                            1    6.9326678  5.977280             NA
## 39                            1    6.1874734        NA             NA
## 4                             2    7.1886911  7.066089       7.211965
## 40                            1    7.1901774        NA             NA
## 41                            1    7.6409027        NA             NA
## 42                            1    7.3543048  8.980140             NA
## 43                            1    4.7818220  8.317413             NA
## 44                            1    8.2039885  7.209453      35.241054
## 45                            1    5.3037468        NA      13.545087
## 46                            1    7.6599183        NA      30.018793
## 47                            1    8.1390023 13.173521       8.663056
## 48                            1    0.6191782 11.965063      18.252658
## 49                            1    7.1395468        NA      10.347126
## 5                             2    6.6256103  7.554589             NA
## 50                            1    5.7517331 13.075145             NA
## 51                            1    6.2205920        NA             NA
## 52                           NA    6.7943897 13.313733      23.512649
## 53                           NA    7.7831903 12.686938             NA
## 54                           NA    7.4376533        NA             NA
## 55                           NA    6.9266194        NA      15.334381
## 56                           NA    7.6690347        NA             NA
## 57                           NA    6.3923743        NA             NA
## 58                           NA    7.8093466        NA             NA
## 59                           NA    8.4674371        NA             NA
## 6                             2    7.3965736  7.761551      41.380502
## 60                           NA    4.4714296        NA             NA
## 61                           NA    8.2526654  8.717676             NA
## 62                           NA   10.5975417 10.582142             NA
## 63                           NA   10.9051623  9.266787      18.700000
## 64                           NA    9.1278687  9.333155      13.800000
## 65                           NA   10.8445096 11.200899      20.400000
## 66                           NA   11.0087787  9.951285      13.900000
## 67                           NA   11.0562652  8.826548      31.100000
## 68                           NA   10.6132806  8.344296      15.500000
## 69                           NA   10.1689229  9.631177      18.900000
## 7                             2    6.5497867        NA             NA
## 70                           NA    7.6596483  8.495855      46.100000
## 71                           NA    9.8422738  9.881114      18.200000
## 72                           NA   10.7625897  8.243174      27.500000
## 73                           NA   10.3994888  8.717676      23.400000
## 74                           NA    8.5950614  7.700440      50.000000
## 75                           NA    7.1006623  7.924813             NA
## 76                           NA   10.2854022  7.686501      22.300000
## 77                           NA    8.9872640 10.913637      31.000000
## 78                           NA    9.7013065  8.290019      35.700000
## 79                           NA    9.9947070 11.197831      52.600000
## 8                             2           NA        NA             NA
## 80                           NA    7.9628960 11.810973      26.400000
## 81                           NA    9.0161118 11.244958      35.600000
## 82                           NA           NA 12.988152      20.500000
## 83                           NA    8.7729745  7.033423      27.700000
## 84                           NA    8.8946701  9.219169      25.100000
## 85                           NA    7.4072678  8.479780      18.700000
## 86                           NA    9.2672550  9.159871      25.200000
## 87                           NA           NA 10.062046      22.500000
## 88                           NA           NA 12.066426      20.200000
## 89                           NA    7.0762538  9.377211      16.600000
## 9                             2    4.2023938  8.388017             NA
## 90                           NA    7.8005768  8.781360      21.600000
## 91                           NA    8.9320595  7.459432      40.400000
## 92                           NA    8.5651021  8.108524      33.000000
## 93                           NA    9.3359486  8.189825      15.000000
## 94                           NA    9.9581902  7.139551      39.500000
## 95                           NA   10.0764114  7.721099      23.500000
## 96                           NA    8.5961898  7.475733      35.000000

###Distination plot

distPlot <- function(col, df){
  p <- 
    ggplot(df) +
    aes_string(col)

  if(is.numeric(df[[col]])) {
    p <- p + geom_density()

  } else {
    p <- p + geom_bar()
  }
}


names(dataDf)[2:length(dataDf)]
##  [1] "CCL3"                             "IgG.FHA"                         
##  [3] "IgG.PRN"                          "IgG.PT"                          
##  [5] "Monocytes"                        "age_at_boost"                    
##  [7] "infancy_vac"                      "biological_sex"                  
##  [9] "event_start_influenza"            "event_start_covid"               
## [11] "exposure_material_type_influenza" "exposure_material_type_covid"    
## [13] "IgG.PT.day14"                     "CCL3.day3"                       
## [15] "Monocytes.day1"
distPlots <- lapply(names(dataDf)[2:length(dataDf)], distPlot, df=dataDf)
plot_grid(plotlist = distPlots)
## Warning: Removed 24 rows containing non-finite values (`stat_density()`).
## Removed 24 rows containing non-finite values (`stat_density()`).
## Removed 24 rows containing non-finite values (`stat_density()`).
## Removed 24 rows containing non-finite values (`stat_density()`).
## Removed 24 rows containing non-finite values (`stat_density()`).
## Warning: Removed 45 rows containing non-finite values (`stat_density()`).
## Removed 45 rows containing non-finite values (`stat_density()`).
## Removed 45 rows containing non-finite values (`stat_density()`).
## Removed 45 rows containing non-finite values (`stat_density()`).
## Warning: Removed 6 rows containing non-finite values (`stat_density()`).
## Warning: Removed 24 rows containing non-finite values (`stat_density()`).
## Warning: Removed 43 rows containing non-finite values (`stat_density()`).

Test model quality

options(warn=-1)
xCols = c("CCL3", "IgG.FHA", "IgG.PRN", "IgG.PT", "Monocytes", "age_at_boost", "infancy_vac", "biological_sex", "event_start_influenza", "event_start_covid", "exposure_material_type_influenza", "exposure_material_type_covid")

# xCols = c("CCL3", "IgG.FHA", "IgG.PRN", "IgG.PT", "Monocytes", "age_at_boost", "infancy_vac", "biological_sex")
yCols = c("IgG.PT.day14", "CCL3.day3", "Monocytes.day1")

pred_cor <- data.frame(matrix(nrow=length(yCols), ncol=3))
rownames(pred_cor) <- yCols
colnames(pred_cor) <- c('pearson.cor.pred.true', 'spearman.cor.pred.true', 'ranked.spearman.cor.pred.true')

rownames(dataDf) <- attr(dataDf, "row.names")
print(typeof(row.names(dataDf)))
## [1] "character"
# 
for (i in 1:length(yCols)){
  all_preds <- c()
  all_true <- c()
  all_reduced <- c()
  set.seed(1)
  
  filteredY <- na.omit(dataDf[yCols[i]])
  filteredX <- na.omit(dataDf[xCols])
  
  row_int <- intersect(rownames(filteredY), rownames(filteredX))
  
  for (j in 1:length(row_int)){
    train <- row_int[-c(j)]
    xData <- filteredX[train, xCols]
    yData <- filteredY[train,]
    
    a1= nrow(xData[train,])
    a2= nrow(xData[train,]-1)
    
    allidx = row_int
    predidx = setdiff(allidx, train)
    
    # create lasso model
    cvfit_out <- cv.glmnet(x=as.matrix(xData), yData, family='gaussian',
                         alpha=1, nfolds=nrow(xData[train,]))

    preds <- predict(cvfit_out, newx = as.matrix(data.frame(filteredX[predidx,])), s='lambda.min')

    all_preds <- c(all_preds, preds)
    all_true<- c(all_true, filteredY[predidx, yCols[i]])
    all_reduced <- c(all_reduced, (filteredY[predidx, yCols[i]]-preds) )
  }
  
  b = data_frame(all_preds, rank(all_preds,na.last="keep",ties.method="min"), all_true, rank(all_true,na.last="keep",ties.method="min"))

  pred_cor[yCols[i],'pearson.cor.pred.true'] <- cor(all_preds,all_true)
  pred_cor[yCols[i],'spearman.cor.pred.true'] <- cor(all_preds,all_true, method="spearman")
  pred_cor[yCols[i],'ranked.spearman.cor.pred.true'] <- cor(rank(all_preds,na.last="keep",ties.method="min"),rank(all_true,na.last="keep",ties.method="min"), method="spearman")
  pred_cor[yCols[i],'MSE'] <- mean((all_reduced)^2)
}

pred_cor
##                pearson.cor.pred.true spearman.cor.pred.true
## IgG.PT.day14               0.3253222              0.4221543
## CCL3.day3                  0.7341841              0.4118906
## Monocytes.day1             0.3505482              0.4178571
##                ranked.spearman.cor.pred.true       MSE
## IgG.PT.day14                       0.4221543   2.64600
## CCL3.day3                          0.4118906   1.23576
## Monocytes.day1                     0.4178571 107.04174
typeof(pred_cor)
## [1] "list"

For each model, can assess which features contribute to non-zero coefficients

Consider only choosing models for follow-on analysis that show good correlation scores

size <- length(yCols)
all_models_coef<-vector(mode='list',length=size)
all_models_names<-vector(mode='list',length=size)
all_models<-vector(mode='list',length=size)

for (i in 1:length(yCols)){
  set.seed(1)
  filteredY <- na.omit(dataDf[yCols[i]])
  filteredX <- na.omit(dataDf[xCols])
  
  row_int <- intersect(rownames(filteredY), rownames(filteredX))
  
  # create lasso model
  suppressWarnings(cvfit_out <- cv.glmnet(x=as.matrix(filteredX[row_int,]), as.matrix(filteredY[row_int,]), family='gaussian', alpha=1, nfolds=nrow(filteredX[row_int,])))
  plot(cvfit_out)
  all_models_coef[i]=list(coef(cvfit_out, s = 'lambda.min')[coef(cvfit_out, s = 'lambda.min')[,1]!= 0])
  all_models_names[i]=list(rownames(coef(cvfit_out, s = 'lambda.min'))[coef(cvfit_out, s = 'lambda.min')[,1]!= 0])
}
## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient

## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient

## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient

names(all_models_coef) <- yCols
names(all_models_names) <- yCols
for (i in 1:size){
  all_models[[i]] = data.frame(cbind(all_models_names[[i]],all_models_coef[[i]]))
  colnames(all_models[[i]])<-c("Variable","Coefficient")
  all_models[[i]]$Coefficient<-as.numeric(all_models[[i]]$Coefficient)
  all_models[[i]]$Coefficient=round(all_models[[i]]$Coefficient,3)
  all_models[[i]]<-all_models[[i]] %>% arrange(desc(abs(Coefficient)))
}
names(all_models)<-yCols

all_models
## $IgG.PT.day14
##                Variable Coefficient
## 1           (Intercept)       6.062
## 2                IgG.PT       0.806
## 3     event_start_covid      -0.299
## 4 event_start_influenza      -0.148
## 5               IgG.PRN       0.124
## 
## $CCL3.day3
##            Variable Coefficient
## 1       (Intercept)       4.263
## 2              CCL3       1.092
## 3       infancy_vac      -0.764
## 4 event_start_covid       0.152
## 5           IgG.PRN      -0.108
## 
## $Monocytes.day1
##      Variable Coefficient
## 1 (Intercept)       6.227
## 2   Monocytes       0.992
setwd("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/models")
a= append(list(pred_cor),all_models)
names(a)[1] <- 'pred_cor'
sink("expanded_allModels_predCor_baseExposure_removedDiplicates_normalized.txt")
# sink("expanded_allModels_predCor_notNormalized.txt")
print(a)
## $pred_cor
##                pearson.cor.pred.true spearman.cor.pred.true
## IgG.PT.day14               0.3253222              0.4221543
## CCL3.day3                  0.7341841              0.4118906
## Monocytes.day1             0.3505482              0.4178571
##                ranked.spearman.cor.pred.true       MSE
## IgG.PT.day14                       0.4221543   2.64600
## CCL3.day3                          0.4118906   1.23576
## Monocytes.day1                     0.4178571 107.04174
## 
## $IgG.PT.day14
##                Variable Coefficient
## 1           (Intercept)       6.062
## 2                IgG.PT       0.806
## 3     event_start_covid      -0.299
## 4 event_start_influenza      -0.148
## 5               IgG.PRN       0.124
## 
## $CCL3.day3
##            Variable Coefficient
## 1       (Intercept)       4.263
## 2              CCL3       1.092
## 3       infancy_vac      -0.764
## 4 event_start_covid       0.152
## 5           IgG.PRN      -0.108
## 
## $Monocytes.day1
##      Variable Coefficient
## 1 (Intercept)       6.227
## 2   Monocytes       0.992
sink()

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.