baseModel

library(omicade4)

## Loading required package: ade4

library(mogsa)
library(RSpectra)
# library(lubridate)
library(glmnet)

## Loading required package: Matrix

## Loaded glmnet 4.1-7

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:mogsa':
## 
##     combine

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(cowplot)
library(ggplot2)

setwd("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/results/main/cmi_pb_datasets/processed/harmonized")

# Read in metadata
meta.2020<-read.table('clinical_metadata.2020.tsv',sep='\t',header=TRUE,stringsAsFactors=TRUE,row.names=1)
meta.2021<-read.table('clinical_metadata.2021.tsv',sep='\t',header=TRUE,stringsAsFactors=TRUE,row.names=1)

files from imputed data is already normalised so we don’t need to normalise them

# imputed_dir is path to local drive where data is saved
setwd("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/results/main/cmi_pb_datasets/processed/imputed")

# Import imputed datasets
rnaseq_baseline_mat_imputed_20 <- read.csv('rnaseq_baseline_mat_imputed_20_051022.csv',row.names=1)
cytof_baseline_mat_imputed_20 <- read.csv('cytof_baseline_mat_imputed_20_051022.csv',row.names=1)
olink_baseline_mat_imputed_20 <- read.csv('olink_baseline_mat_imputed_20_051022.csv',row.names=1)
abtiters_baseline_mat_imputed_20 <- read.csv('abtiters_baseline_mat_imputed_20_051022.csv',row.names=1)

rnaseq_baseline_mat_imputed_21 <- read.csv('rnaseq_baseline_mat_imputed_21_051022.csv',row.names=1)
cytof_baseline_mat_imputed_21 <- read.csv('cytof_baseline_mat_imputed_21_051022.csv',row.names=1)
olink_baseline_mat_imputed_21 <- read.csv('olink_baseline_mat_imputed_21_051022.csv',row.names=1)
abtiters_baseline_mat_imputed_21 <- read.csv('abtiters_baseline_mat_imputed_21_051022.csv',row.names=1)

tasks_seq<-c('ENSG00000277632','ENSG00000136244','ENSG00000100906','ENSG00000229807')
names(rnaseq_baseline_mat_imputed_20[tasks_seq])

## [1] "ENSG00000277632" "ENSG00000136244" "ENSG00000100906" "ENSG00000229807"

distPlot <- function(col, df){
  p <- 
    ggplot(df) +
    aes_string(col)

  if(is.numeric(df[[col]])) {
    p <- p + geom_density()

  } else {
    p <- p + geom_bar()
  }
}

distPlots <- lapply(tasks_seq, distPlot, df=rnaseq_baseline_mat_imputed_20)

## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

plot_grid(plotlist = distPlots)

# Get age at boost
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:cowplot':
## 
##     stamp

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

meta.2020$date_of_boost<-parse_date_time(meta.2020$date_of_boost,"ymd")
meta.2020$year_of_birth<-parse_date_time(meta.2020$year_of_birth,"ymd")
meta.2020$age_at_boost<- as.numeric(round(difftime(meta.2020$date_of_boost,
                                                    meta.2020$year_of_birth,units="weeks")/52,2))
meta.2021$date_of_boost<-parse_date_time(meta.2021$date_of_boost,"ymd")
meta.2021$year_of_birth<-parse_date_time(meta.2021$year_of_birth,"ymd")
meta.2021$age_at_boost<- as.numeric(round(difftime(meta.2021$date_of_boost,
                                                    meta.2021$year_of_birth,units="weeks")/52,2))

meta <- rbind(meta.2020[c("age_at_boost", "infancy_vac", "biological_sex")], meta.2021[c("age_at_boost", "infancy_vac", "biological_sex")])

meta$infancy_vac <- as.numeric(meta$infancy_vac)
meta$biological_sex <- as.numeric(meta$biological_sex)
colnames(meta)

## [1] "age_at_boost"   "infancy_vac"    "biological_sex"

add exposure table

library(DBI)
library(RPostgreSQL)
library(tidyr)

## 
## Attaching package: 'tidyr'

## The following objects are masked from 'package:Matrix':
## 
##     expand, pack, unpack

library(dplyr)
library(readr)

dsn_database = "cmipb_v4_0"   
dsn_hostname = "cmi-pb.lji.org"
dsn_port = "5432"                
dsn_uid = "cmipb"         
dsn_pwd = "b5mq62vW7JE2YUwq"


tryCatch({
    drv <- dbDriver("PostgreSQL")
    print("Connecting to Database…")
    connec <- dbConnect(drv,
                 dbname = dsn_database,
                 host = dsn_hostname,
                 port = dsn_port,
                 user = dsn_uid,
                 password = dsn_pwd)
    print("Database Connected!")
    },
    error=function(cond) {
            print("Unable to connect to Database.")
    })

## [1] "Connecting to Database…"
## [1] "Database Connected!"

# dbListTables(connec)

library(tibble)
library(tidyverse)

## ── Attaching core tidyverse packages ─────────────────── tidyverse 2.0.0.9000 ──
## ✔ forcats 1.0.0     ✔ stringr 1.5.0
## ✔ purrr   1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::combine()   masks mogsa::combine()
## ✖ tidyr::expand()    masks Matrix::expand()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ tidyr::pack()      masks Matrix::pack()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ✖ tidyr::unpack()    masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

# library(lubridate)
exposDf <- dbGetQuery(connec, "SELECT * FROM immune_exposure")
ifelse(exposDf$event_start==exposDf$event_end,"Yes","No")

##   [1] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [13] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [25] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [37] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [49] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [61] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [73] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [85] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
##  [97] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
## [109] "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes" "Yes"
## [121] "Yes"

exposDf %>% count(event_type)

##                 event_type  n
## 1               bronchitis  1
## 2                    covid 36
## 3                     dtap  1
## 4                      hib  2
## 5                      hpv 11
## 6                influenza 32
## 7    japanese encephalitis  1
## 8            meningococcal  8
## 9            mononucleosis  1
## 10            pneumococcal  1
## 11               pneumonia  3
## 12                   polio 10
## 13            strep throat  4
## 14 tick-borne encephalitis  1
## 15                 typhoid  1
## 16               varicella  7
## 17          whooping cough  1

exposDf$exposure_material_type <- as.character(as.numeric(as.factor(exposDf$exposure_material_type)))
exposDf$event_start <- as.character(as.numeric(as.factor(exposDf$event_start)))
# 
exposDf <- dplyr::filter(exposDf, event_type %in% c('covid', 'influenza'))

exposDf1 <- exposDf[, c('subject_id', 'event_type','exposure_material_type', 'event_start')] %>%
              pivot_wider( names_from = event_type,
              values_from = c(event_start, exposure_material_type),
              values_fn= max,
              values_fill='Not known'
              )

colnames(exposDf1)

## [1] "subject_id"                       "event_start_influenza"           
## [3] "event_start_covid"                "exposure_material_type_influenza"
## [5] "exposure_material_type_covid"

typeof(exposDf1)

## [1] "list"

exposDf2 <- as.data.frame(exposDf1)
for (col in colnames(exposDf1)[2:length(colnames(exposDf1))]){
  
  exposDf2 <- exposDf1[!grepl(',', exposDf1[[col]]), ]
}

exposDf2 <- exposDf2%>% mutate(across(colnames(exposDf2), as.factor))
exposDf2 <- exposDf2%>% mutate(across(colnames(exposDf2), as.numeric))
exposDf2

## # A tibble: 56 × 5
##    subject_id event_start_influenza event_start_covid exposure_material_type_i…¹
##         <dbl>                 <dbl>             <dbl>                      <dbl>
##  1          3                     1                 1                          1
##  2         40                     1                 1                          1
##  3         43                     3                 1                          1
##  4         35                     1                 1                          1
##  5         49                     2                 1                          1
##  6         47                     2                 1                          1
##  7          2                     1                 3                          1
##  8          4                     2                 3                          1
##  9          5                     1                 3                          1
## 10          6                     1                 3                          1
## # ℹ 46 more rows
## # ℹ abbreviated name: ¹exposure_material_type_influenza
## # ℹ 1 more variable: exposure_material_type_covid <dbl>

# nullToNA <- function(x) {
#     x[sapply(x, is.null)] <- 'Not known'
#     return(x)
# }
# z=exposDf3[2][1]
# exposDf3 <- lapply(exposDf2, nullToNA)
# exposDf3.df <- t(do.call(rbind,lapply(exposDf3, rbind)))
# 
# # z= dim(exposDf3.df)[1]
# # print(dim(exposDf3.df))
# # print(exposDf3.df[[2]][2])
# # exposDf4 <- as.data.frame(matrix(unlist(exposDf3.df),nrow=dim(exposDf3.df)[1],byrow=TRUE)
# exposDf4 <- as.data.frame(matrix(unlist(exposDf3.df),nrow=dim(exposDf3.df)[1],byrow=FALSE))
# # z= typeof(exposDf4)
# colnames(exposDf4) <- names(exposDf3)
# #
# # z=exposDf3.df$subject_id
# #

All data from abtiter been log2 transformed. Why? to make them normally distributed since our glm uses gaussian as the distribution kernel?

# Get the y data
dataY = read.table('/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/results/yData_task_matrix.common_names.mfi_raw.tsv',sep='\t',header=TRUE,stringsAsFactors=TRUE,row.names=1)

dataY <- dataY[c("IgG.PT.day14", "ENSG00000277632.day3", "Monocytes.day1")]
colnames(dataY) <- c("IgG.PT.day14", "CCL3.day3", "Monocytes.day1")
dataY$IgG.PT.day14 <- log2(dataY[,'IgG.PT.day14']+1)
dataY$CCL3.day3 <- log2(dataY[,'CCL3.day3']+1)

dataY$subject_id <- rownames(dataY)
colnames(dataY)

## [1] "IgG.PT.day14"   "CCL3.day3"      "Monocytes.day1" "subject_id"

distPlot <- function(col, df){
  p <- 
    ggplot(df) +
    aes_string(col)

  if(is.numeric(df[[col]])) {
    p <- p + geom_density()

  } else {
    p <- p + geom_bar()
  }
}

typeof(dataY)

## [1] "list"

# 
distPlots <- lapply(c("IgG.PT.day14", "CCL3.day3", "Monocytes.day1"), distPlot, df=dataY)
plot_grid(plotlist = distPlots)

## Warning: Removed 4 rows containing non-finite values (`stat_density()`).

## Warning: Removed 22 rows containing non-finite values (`stat_density()`).

## Warning: Removed 41 rows containing non-finite values (`stat_density()`).

colnames(rnaseq_baseline_mat_imputed_20)[which(names(rnaseq_baseline_mat_imputed_20) == "ENSG00000277632")] <- "CCL3"
colnames(rnaseq_baseline_mat_imputed_21)[which(names(rnaseq_baseline_mat_imputed_21) == "ENSG00000277632")] <- "CCL3"

rnaDf <- rbind(rnaseq_baseline_mat_imputed_20["CCL3"], rnaseq_baseline_mat_imputed_21["CCL3"])

abtiterDf <- rbind(abtiters_baseline_mat_imputed_20["IgG.PT"], abtiters_baseline_mat_imputed_21["IgG.PT"])

cytofDf <- rbind(cytof_baseline_mat_imputed_20["Monocytes"], cytof_baseline_mat_imputed_21["Monocytes"])

dataDf1 <- merge(rnaDf, abtiterDf, by='row.names', all=T)
colnames(dataDf1)[1] <- "subject_id" 

dataDf2 <- merge(cytofDf, meta, by='row.names', all=T)
colnames(dataDf2)[1] <- "subject_id"

dataX1 <- merge(dataDf1, dataDf2, by='subject_id', all=T)
# colnames(dataX1)[1] <- "subject_id" 

dataX <- merge(dataX1, exposDf2, by='subject_id', all=T)
# 
dataDf <- merge(dataX, dataY, by='subject_id', all=T)
rownames(dataDf) <- dataDf$subject_id

dataDf

##    subject_id      CCL3     IgG.PT  Monocytes age_at_boost infancy_vac
## 1           1  5.333531 2.24397113  7.5260062        30.80           2
## 10         10  4.101398 0.04154169 13.4642547        34.68           2
## 11         11  4.542939 1.77698271 10.7905171        30.76           2
## 12         12        NA         NA         NA        34.68           2
## 13         13  3.399718 1.36806068  5.0304685        19.63           1
## 14         14        NA         NA         NA        23.70           2
## 15         15  4.835874 0.38100554 15.6373679        27.71           2
## 16         16        NA         NA         NA        29.66           2
## 17         17  4.512669 3.45553366 14.4574172        36.82           2
## 18         18  4.083384 1.68734290  0.9568001        19.73           1
## 19         19  4.459300 2.16297720 13.5202756        22.81           2
## 2           2        NA         NA         NA        51.25           2
## 20         20  3.963474 2.55022450 21.3991621        35.78           2
## 21         21  3.823138 2.77448207 15.2266453        33.77           2
## 22         22  4.462314 2.16393744 11.9355890        31.77           2
## 23         23  4.421223 0.40194862 11.7041207        25.82           2
## 24         24  3.106516 0.26992483 11.5201600        24.79           2
## 25         25  4.042732 0.74967184 13.4709985        28.80           2
## 26         26  4.735685 0.72685734  5.5320667        33.85           2
## 27         27  3.782618 0.77080873 13.8965682        19.80           1
## 28         28        NA         NA         NA        34.85           2
## 29         29  4.447249 0.99688351 16.0947648        19.80           1
## 3           3  4.599972 1.06789053 13.0245725        33.89           2
## 30         30        NA         NA         NA        28.84           2
## 31         31  4.693487 0.16545091  4.8790233        27.83           2
## 32         32  5.779549 1.40368966 13.5492381        19.88           1
## 33         33  5.311794 0.63824158  7.6518328        26.87           2
## 34         34        NA         NA         NA        33.93           2
## 35         35  5.434762 1.19302502 13.5420630        25.86           2
## 36         36  3.016496 1.04080942 11.2437986        19.88           1
## 37         37        NA         NA         NA        18.91           1
## 38         38  3.017922 0.77149215 13.7721006        19.88           1
## 39         39        NA         NA         NA        31.92           2
## 4           4  4.094574 1.60723380  5.4908380        28.76           2
## 40         40        NA         NA         NA        22.89           2
## 41         41        NA         NA         NA        31.96           2
## 42         42  5.230280 1.11654738 10.6095832        19.92           1
## 43         43  3.871351 1.00310977 13.6337391        18.91           1
## 44         44  4.518409 0.92865661 23.1032762        18.91           1
## 45         45        NA         NA         NA        19.98           1
## 46         46        NA         NA         NA        18.91           1
## 47         47  6.900831 2.65845646 10.1292397        20.98           1
## 48         48  6.731536 0.04154169 14.9597507        19.11           1
## 49         49        NA         NA         NA        20.11           1
## 5           5  4.916572 2.26243525 13.1258248        25.75           2
## 50         50  8.091139 0.50769248  9.3613078        19.98           1
## 51         51        NA         NA         NA        19.98           1
## 52         52  7.288949 1.97201547 28.5490352        19.07           1
## 53         53  9.867189 4.17711376 12.8294284        19.07           1
## 54         54        NA         NA         NA        20.11           1
## 55         55        NA         NA         NA        20.11           1
## 56         56        NA         NA         NA        20.15           1
## 57         57        NA         NA         NA        21.15           1
## 58         58        NA         NA         NA        20.15           1
## 59         59        NA         NA         NA        20.15           1
## 6           6  4.575554 0.27896948 22.7930283        28.87           2
## 60         60        NA         NA         NA        20.15           1
## 61         61 12.042340 1.00000000 12.8005992        32.38           2
## 62         62  6.937333 1.07602222 13.5692650        25.99           2
## 63         63  6.691953 1.79570532 15.3000000        23.98           2
## 64         64  6.760926 2.04705167 12.9000000        25.99           2
## 65         65  7.605079 0.80140188 15.8000000        29.02           2
## 66         66  5.042425 1.32512343 15.8000000        43.07           2
## 67         67  5.889230 0.70618673 34.6000000        47.24           2
## 68         68  5.853946 1.05459788 13.5000000        47.24           2
## 69         69  5.528321 1.07297696 13.1000000        29.17           2
## 7           7        NA         NA         NA        35.97           2
## 70         70  5.815166 0.47027882 32.3000000        21.15           1
## 71         71  5.987707 2.90659418 12.2000000        21.15           1
## 72         72  5.880881 2.08840338 19.9000000        28.25           2
## 73         73  4.529196 1.88327381 15.7000000        24.23           2
## 74         74  3.871745 0.52016941 36.3000000        24.23           2
## 75         75  5.678804 0.17736981 14.4559889        21.22           1
## 76         76  4.663800 3.53643892 18.0000000        21.22           1
## 77         77  8.217396 0.69081102 28.8000000        31.32           2
## 78         78  7.151839 0.99519368 29.0000000        26.30           2
## 79         79  6.969300 2.32256773 41.5000000        32.32           2
## 8           8        NA         NA         NA        34.27           2
## 80         80  7.178107 0.46841860 21.9000000        27.30           2
## 81         81 10.726589 1.81601157 34.9000000        26.30           2
## 82         82  9.413611 0.76723104 22.5000000        21.28           1
## 83         83  8.588209 1.26356779 32.4000000        20.34           1
## 84         84  7.593122 0.28053529 18.9000000        22.34           1
## 85         85  4.937862 0.33276350 19.7000000        19.39           1
## 86         86  5.362224 0.30403242 23.5000000        21.40           1
## 87         87  5.295650 1.07997538 22.9000000        19.39           1
## 88         88  5.567850 1.04544297 17.6000000        19.39           1
## 89         89  5.130601 0.06807137 15.7000000        22.49           1
## 9           9  4.238481 0.28783862  5.7229578        20.63           1
## 90         90  4.837136 0.33985853 20.6000000        20.49           1
## 91         91  4.140370 0.92749667 25.4000000        21.49           1
## 92         92  4.623047 0.23829881 40.6000000        19.54           1
## 93         93  4.616769 1.57747907 16.3000000        23.56           1
## 94         94  4.785446 2.65824315 26.2000000        20.55           1
## 95         95  4.878235 2.29792327 17.3000000        21.55           1
## 96         96  4.356355 0.76421696 34.2000000        19.54           1
##    biological_sex event_start_influenza event_start_covid
## 1               1                     4                 1
## 10              1                     3                 3
## 11              1                     2                 3
## 12              2                     4                 1
## 13              2                     1                 3
## 14              2                     3                 3
## 15              2                     3                 3
## 16              1                     4                 1
## 17              1                     3                 3
## 18              1                     3                 3
## 19              2                     3                 3
## 2               1                     1                 3
## 20              1                     3                 3
## 21              2                     3                 3
## 22              1                     4                 1
## 23              1                     4                 1
## 24              1                     2                 3
## 25              1                     3                 3
## 26              1                     4                 1
## 27              1                     3                 3
## 28              2                     2                 3
## 29              2                     1                 3
## 3               1                     1                 1
## 30              1                     2                 3
## 31              1                     3                 3
## 32              2                     4                 1
## 33              2                     3                 3
## 34              1                     4                 1
## 35              2                     1                 1
## 36              1                     4                 1
## 37              1                     4                 1
## 38              1                     4                 2
## 39              1                     2                 2
## 4               2                     2                 3
## 40              1                     1                 1
## 41              2                     4                 1
## 42              1                     4                 1
## 43              1                     3                 1
## 44              1                     4                 1
## 45              1                     4                 1
## 46              1                     4                 1
## 47              1                     2                 1
## 48              1                     4                 1
## 49              1                     2                 1
## 5               2                     1                 3
## 50              1                     4                 1
## 51              2                     4                 2
## 52              2                     4                 1
## 53              1                     4                 1
## 54              1                     4                 1
## 55              1                     4                 1
## 56              1                     4                 1
## 57              1                    NA                NA
## 58              1                    NA                NA
## 59              1                    NA                NA
## 6               1                     1                 3
## 60              2                    NA                NA
## 61              1                    NA                NA
## 62              1                    NA                NA
## 63              1                    NA                NA
## 64              2                    NA                NA
## 65              2                    NA                NA
## 66              1                    NA                NA
## 67              1                    NA                NA
## 68              2                    NA                NA
## 69              1                    NA                NA
## 7               1                     3                 3
## 70              2                    NA                NA
## 71              1                    NA                NA
## 72              1                    NA                NA
## 73              1                    NA                NA
## 74              1                    NA                NA
## 75              1                    NA                NA
## 76              1                    NA                NA
## 77              2                    NA                NA
## 78              1                    NA                NA
## 79              2                    NA                NA
## 8               1                     3                 3
## 80              1                    NA                NA
## 81              2                    NA                NA
## 82              1                    NA                NA
## 83              1                    NA                NA
## 84              1                    NA                NA
## 85              1                    NA                NA
## 86              1                    NA                NA
## 87              2                    NA                NA
## 88              2                    NA                NA
## 89              1                    NA                NA
## 9               2                     1                 3
## 90              1                    NA                NA
## 91              2                    NA                NA
## 92              1                    NA                NA
## 93              1                    NA                NA
## 94              2                    NA                NA
## 95              1                    NA                NA
## 96              2                    NA                NA
##    exposure_material_type_influenza exposure_material_type_covid IgG.PT.day14
## 1                                 3                            1    7.6475855
## 10                                1                            2    2.2695086
## 11                                1                            2    8.6987531
## 12                                3                            1    6.6128578
## 13                                1                            2    5.8841566
## 14                                1                            2    8.0768224
## 15                                1                            2    6.6651860
## 16                                3                            1    6.4541001
## 17                                1                            2    7.4085495
## 18                                1                            2    7.5228254
## 19                                1                            2    7.4194018
## 2                                 1                            2           NA
## 20                                1                            2    7.0689888
## 21                                1                            2    5.1846314
## 22                                3                            1    6.5076306
## 23                                3                            1    7.0576625
## 24                                1                            2    4.6224481
## 25                                1                            2    4.5909272
## 26                                3                            1    5.5468623
## 27                                1                            2    5.4905601
## 28                                1                            2    6.8762502
## 29                                1                            2    6.2496495
## 3                                 1                            1    7.0245630
## 30                                1                            2    5.9932153
## 31                                1                            2    5.0875933
## 32                                3                            1    8.0965425
## 33                                1                            2    3.8450834
## 34                                3                            1    7.4372481
## 35                                1                            1    6.2916862
## 36                                3                            1    5.4060017
## 37                                3                            1           NA
## 38                                3                            1    6.9326678
## 39                                2                            1    6.1874734
## 4                                 1                            2    7.1886911
## 40                                1                            1    7.1901774
## 41                                3                            1    7.6409027
## 42                                3                            1    7.3543048
## 43                                1                            1    4.7818220
## 44                                3                            1    8.2039885
## 45                                3                            1    5.3037468
## 46                                3                            1    7.6599183
## 47                                1                            1    8.1390023
## 48                                3                            1    0.6191782
## 49                                1                            1    7.1395468
## 5                                 1                            2    6.6256103
## 50                                3                            1    5.7517331
## 51                                3                            1    6.2205920
## 52                                3                            1    6.7943897
## 53                                3                            1    7.7831903
## 54                                3                            1    7.4376533
## 55                                3                            1    6.9266194
## 56                                3                            1    7.6690347
## 57                               NA                           NA    6.3923743
## 58                               NA                           NA    7.8093466
## 59                               NA                           NA    8.4674371
## 6                                 1                            2    7.3965736
## 60                               NA                           NA    4.4714296
## 61                               NA                           NA    8.2526654
## 62                               NA                           NA   10.5975417
## 63                               NA                           NA   10.9051623
## 64                               NA                           NA    9.1278687
## 65                               NA                           NA   10.8445096
## 66                               NA                           NA   11.0087787
## 67                               NA                           NA   11.0562652
## 68                               NA                           NA   10.6132806
## 69                               NA                           NA   10.1689229
## 7                                 1                            2    6.5497867
## 70                               NA                           NA    7.6596483
## 71                               NA                           NA    9.8422738
## 72                               NA                           NA   10.7625897
## 73                               NA                           NA   10.3994888
## 74                               NA                           NA    8.5950614
## 75                               NA                           NA    7.1006623
## 76                               NA                           NA   10.2854022
## 77                               NA                           NA    8.9872640
## 78                               NA                           NA    9.7013065
## 79                               NA                           NA    9.9947070
## 8                                 1                            2           NA
## 80                               NA                           NA    7.9628960
## 81                               NA                           NA    9.0161118
## 82                               NA                           NA           NA
## 83                               NA                           NA    8.7729745
## 84                               NA                           NA    8.8946701
## 85                               NA                           NA    7.4072678
## 86                               NA                           NA    9.2672550
## 87                               NA                           NA           NA
## 88                               NA                           NA           NA
## 89                               NA                           NA    7.0762538
## 9                                 1                            2    4.2023938
## 90                               NA                           NA    7.8005768
## 91                               NA                           NA    8.9320595
## 92                               NA                           NA    8.5651021
## 93                               NA                           NA    9.3359486
## 94                               NA                           NA    9.9581902
## 95                               NA                           NA   10.0764114
## 96                               NA                           NA    8.5961898
##    CCL3.day3 Monocytes.day1
## 1   8.531381             NA
## 10  7.491853             NA
## 11  7.179909       7.257095
## 12        NA             NA
## 13  9.832890             NA
## 14        NA             NA
## 15  8.118941      10.585489
## 16        NA             NA
## 17  8.209453      16.401488
## 18  6.643856             NA
## 19  7.066089             NA
## 2         NA             NA
## 20  8.909893      26.605583
## 21  7.900867      34.812168
## 22  6.375039             NA
## 23  7.066089             NA
## 24  7.238405             NA
## 25  7.562242             NA
## 26  7.219169      16.108508
## 27  7.592457             NA
## 28        NA             NA
## 29  8.154818      25.083209
## 3   7.888743             NA
## 30        NA             NA
## 31  8.897845       8.545243
## 32  9.353147             NA
## 33  9.550747      17.703064
## 34        NA             NA
## 35  7.774787             NA
## 36  7.584963      17.446750
## 37        NA             NA
## 38  5.977280             NA
## 39        NA             NA
## 4   7.066089       7.211965
## 40        NA             NA
## 41        NA             NA
## 42  8.980140             NA
## 43  8.317413             NA
## 44  7.209453      35.241054
## 45        NA      13.545087
## 46        NA      30.018793
## 47 13.173521       8.663056
## 48 11.965063      18.252658
## 49        NA      10.347126
## 5   7.554589             NA
## 50 13.075145             NA
## 51        NA             NA
## 52 13.313733      23.512649
## 53 12.686938             NA
## 54        NA             NA
## 55        NA      15.334381
## 56        NA             NA
## 57        NA             NA
## 58        NA             NA
## 59        NA             NA
## 6   7.761551      41.380502
## 60        NA             NA
## 61  8.717676             NA
## 62 10.582142             NA
## 63  9.266787      18.700000
## 64  9.333155      13.800000
## 65 11.200899      20.400000
## 66  9.951285      13.900000
## 67  8.826548      31.100000
## 68  8.344296      15.500000
## 69  9.631177      18.900000
## 7         NA             NA
## 70  8.495855      46.100000
## 71  9.881114      18.200000
## 72  8.243174      27.500000
## 73  8.717676      23.400000
## 74  7.700440      50.000000
## 75  7.924813             NA
## 76  7.686501      22.300000
## 77 10.913637      31.000000
## 78  8.290019      35.700000
## 79 11.197831      52.600000
## 8         NA             NA
## 80 11.810973      26.400000
## 81 11.244958      35.600000
## 82 12.988152      20.500000
## 83  7.033423      27.700000
## 84  9.219169      25.100000
## 85  8.479780      18.700000
## 86  9.159871      25.200000
## 87 10.062046      22.500000
## 88 12.066426      20.200000
## 89  9.377211      16.600000
## 9   8.388017             NA
## 90  8.781360      21.600000
## 91  7.459432      40.400000
## 92  8.108524      33.000000
## 93  8.189825      15.000000
## 94  7.139551      39.500000
## 95  7.721099      23.500000
## 96  7.475733      35.000000

###Distination plot

distPlot <- function(col, df){
  p <- 
    ggplot(df) +
    aes_string(col)

  if(is.numeric(df[[col]])) {
    p <- p + geom_density()

  } else {
    p <- p + geom_bar()
  }
}


names(dataDf)[2:length(dataDf)]

##  [1] "CCL3"                             "IgG.PT"                          
##  [3] "Monocytes"                        "age_at_boost"                    
##  [5] "infancy_vac"                      "biological_sex"                  
##  [7] "event_start_influenza"            "event_start_covid"               
##  [9] "exposure_material_type_influenza" "exposure_material_type_covid"    
## [11] "IgG.PT.day14"                     "CCL3.day3"                       
## [13] "Monocytes.day1"

distPlots <- lapply(names(dataDf)[2:length(dataDf)], distPlot, df=dataDf)
plot_grid(plotlist = distPlots)

## Warning: Removed 24 rows containing non-finite values (`stat_density()`).
## Removed 24 rows containing non-finite values (`stat_density()`).
## Removed 24 rows containing non-finite values (`stat_density()`).

## Warning: Removed 40 rows containing non-finite values (`stat_density()`).
## Removed 40 rows containing non-finite values (`stat_density()`).
## Removed 40 rows containing non-finite values (`stat_density()`).
## Removed 40 rows containing non-finite values (`stat_density()`).

## Warning: Removed 6 rows containing non-finite values (`stat_density()`).

## Warning: Removed 24 rows containing non-finite values (`stat_density()`).

## Warning: Removed 43 rows containing non-finite values (`stat_density()`).

Test model quality

options(warn=-1)
xCols = c("CCL3", "IgG.PT", "Monocytes", "age_at_boost", "infancy_vac", "biological_sex", "event_start_influenza", "event_start_covid", "exposure_material_type_influenza", "exposure_material_type_covid")
yCols = c("IgG.PT.day14", "CCL3.day3", "Monocytes.day1")

pred_cor <- data.frame(matrix(nrow=length(yCols), ncol=3))
rownames(pred_cor) <- yCols
colnames(pred_cor) <- c('pearson.cor.pred.true', 'spearman.cor.pred.true', 'ranked.spearman.cor.pred.true')

rownames(dataDf) <- attr(dataDf, "row.names")
print(typeof(row.names(dataDf)))

## [1] "character"

# 
for (i in 1:length(yCols)){
  all_preds <- c()
  all_true <- c()
  set.seed(1)
  
  filteredY <- na.omit(dataDf[yCols[i]])
  filteredX <- na.omit(dataDf[xCols])
  
  row_int <- intersect(rownames(filteredY), rownames(filteredX))
  
  for (j in 1:length(row_int)){
    train <- row_int[-c(j)]
    xData <- filteredX[train, xCols]
    yData <- filteredY[train,]
    
    a1= nrow(xData[train,])
    a2= nrow(xData[train,]-1)
    
    allidx = row_int
    predidx = setdiff(allidx, train)
    
    # create lasso model
    cvfit_out <- cv.glmnet(x=as.matrix(xData), yData, family='gaussian',
                         alpha=1, nfolds=nrow(xData[train,]))

    preds <- predict(cvfit_out, newx = as.matrix(data.frame(filteredX[predidx,])), s='lambda.min')

    all_preds <- c(all_preds, preds)
    all_true<- c(all_true, filteredY[predidx, yCols[i]])
  }
  
  b = data_frame(all_preds, rank(all_preds,na.last="keep",ties.method="min"), all_true, rank(all_true,na.last="keep",ties.method="min"))

  pred_cor[yCols[i],'pearson.cor.pred.true'] <- cor(all_preds,all_true)
  pred_cor[yCols[i],'spearman.cor.pred.true'] <- cor(all_preds,all_true, method="spearman")
  pred_cor[yCols[i],'ranked.spearman.cor.pred.true'] <- cor(rank(all_preds,na.last="keep",ties.method="min"),rank(all_true,na.last="keep",ties.method="min"), method="spearman")
}

pred_cor

##                pearson.cor.pred.true spearman.cor.pred.true
## IgG.PT.day14               0.3965526              0.4630631
## CCL3.day3                  0.7906531              0.5311535
## Monocytes.day1             0.5462973              0.6205882
##                ranked.spearman.cor.pred.true
## IgG.PT.day14                       0.4630631
## CCL3.day3                          0.5311535
## Monocytes.day1                     0.6205882

typeof(pred_cor)

## [1] "list"

For each model, can assess which features contribute to non-zero coefficients

Consider only choosing models for follow-on analysis that show good correlation scores

size <- length(yCols)
all_models_coef<-vector(mode='list',length=size)
all_models_names<-vector(mode='list',length=size)
all_models<-vector(mode='list',length=size)

for (i in 1:length(yCols)){
  set.seed(1)
  filteredY <- na.omit(dataDf[yCols[i]])
  filteredX <- na.omit(dataDf[xCols])
  
  row_int <- intersect(rownames(filteredY), rownames(filteredX))
  
  # create lasso model
  suppressWarnings(cvfit_out <- cv.glmnet(x=as.matrix(filteredX[row_int,]), as.matrix(filteredY[row_int,]), family='gaussian', alpha=1, nfolds=nrow(filteredX[row_int,])))
  plot(cvfit_out)
  all_models_coef[i]=list(coef(cvfit_out, s = 'lambda.min')[coef(cvfit_out, s = 'lambda.min')[,1]!= 0])
  all_models_names[i]=list(rownames(coef(cvfit_out, s = 'lambda.min'))[coef(cvfit_out, s = 'lambda.min')[,1]!= 0])
}

## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient

## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient

## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient

names(all_models_coef) <- yCols
names(all_models_names) <- yCols

for (i in 1:size){
  all_models[[i]] = data.frame(cbind(all_models_names[[i]],all_models_coef[[i]]))
  colnames(all_models[[i]])<-c("Variable","Coefficient")
  all_models[[i]]$Coefficient<-as.numeric(all_models[[i]]$Coefficient)
  all_models[[i]]$Coefficient=round(all_models[[i]]$Coefficient,3)
  all_models[[i]]<-all_models[[i]] %>% arrange(desc(abs(Coefficient)))
}
names(all_models)<-yCols

all_models

## $IgG.PT.day14
##      Variable Coefficient
## 1 (Intercept)       5.345
## 2      IgG.PT       0.650
## 
## $CCL3.day3
##      Variable Coefficient
## 1 (Intercept)       5.030
## 2        CCL3       0.926
## 3 infancy_vac      -0.623
## 
## $Monocytes.day1
##      Variable Coefficient
## 1 (Intercept)      13.213
## 2        CCL3      -1.238
## 3   Monocytes       0.877

# library(capture)
# setwd("/Users/rnili/Desktop/repo/gitLab/cmi-pb-multiomics-main/rasteh/models")
a= append(list(pred_cor),all_models)
# names(a)[1] <- 'pred_cor'
# sink("allModels_predCor_baseExposure_normalized.txt")
print(a)

## [[1]]
##                pearson.cor.pred.true spearman.cor.pred.true
## IgG.PT.day14               0.3965526              0.4630631
## CCL3.day3                  0.7906531              0.5311535
## Monocytes.day1             0.5462973              0.6205882
##                ranked.spearman.cor.pred.true
## IgG.PT.day14                       0.4630631
## CCL3.day3                          0.5311535
## Monocytes.day1                     0.6205882
## 
## $IgG.PT.day14
##      Variable Coefficient
## 1 (Intercept)       5.345
## 2      IgG.PT       0.650
## 
## $CCL3.day3
##      Variable Coefficient
## 1 (Intercept)       5.030
## 2        CCL3       0.926
## 3 infancy_vac      -0.623
## 
## $Monocytes.day1
##      Variable Coefficient
## 1 (Intercept)      13.213
## 2        CCL3      -1.238
## 3   Monocytes       0.877

# sink()

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

baseModel

2023-05-08

add exposure table

All data from abtiter been log2 transformed. Why? to make them normally distributed since our glm uses gaussian as the distribution kernel?

Test model quality

For each model, can assess which features contribute to non-zero coefficients