Two demo models are created to depict model construction and submission file preparation process for 3rd (Public) CMI-PB challenge.

  • Model 1: Baseline (day 0) IgG antibody titer against PT as predictor of two Antibody titer prediction tasks

  • Model 2: Age as predictor for all prediction tasks

Load resources

library(lubridate)
library(tidyverse)
library(DT)
library(BiocStyle)


base_dir = "/home/pramod/Documents/GitHub/gitlab/submission_pipeline_demo/challenge3/"
data_dir = paste0(base_dir, "data/")
#load(paste0(base_dir, "code/codebase1.R"))

submission_template = readr::read_tsv(paste0(data_dir, "3rdChallengeSubmissionTemplate_revised.tsv"))
master_harmonized_data = read_rds(paste0(data_dir, "master_harmonized_data.RDS"))

#subject_2023BD = master_harmonized_data$challenge$subject_specimen
subject_specimen_2023BD = master_harmonized_data$challenge$subject_specimen

plasma_ab_titer_2023BD = master_harmonized_data$challenge$plasma_antibody_levels$long

## More resources
# Selecting the first four columns to keep as they are
kept_columns <- submission_template %>%
  select(SubjectID, Age, BiologicalSexAtBirth, VaccinePrimingStatus)

# Getting the names of columns to be replaced with age_rank
submission_template_col_names <- names(submission_template)

Model 1: Baseline (day 0) IgG antibody titer against PT as predictor of Ab titer tasks

1.1) Model Construction

IgG_baseline_model_df <- plasma_ab_titer_2023BD %>%
  left_join(subject_specimen_2023BD) %>%
  filter(isotype_antigen == "IgG_PT")  %>%
  filter(planned_day_relative_to_boost == 0)  %>%
  dplyr::select(subject_id, MFI_normalised)  %>%
  mutate(IgG_baseline_rank = rank(MFI_normalised)) %>%
  dplyr::select(subject_id, IgG_baseline_rank) %>%
  rename("SubjectID" = "subject_id") 

IgG_baseline_final_model_df <- IgG_baseline_model_df %>%
  mutate(
    "task1_1" = IgG_baseline_rank,
    "task1_2" = IgG_baseline_rank
  ) %>%
  dplyr::select(-IgG_baseline_rank)


datatable(IgG_baseline_model_df)
datatable(IgG_baseline_final_model_df)

1.2) Prepare submission file

# Selecting the first four columns to keep as they are
kept_columns_v2 <- submission_template %>%
  select(SubjectID, "2.1) Monocytes-D1-Rank",  "2.2) Monocytes-D1-FC-Rank","3.1) CCL3-D3-Rank","3.2) CCL3-D3-FC-Rank", "4.1) IFNG/IL5-PolarizationRatio-D28-Rank")

# Creating a dataframe with age_rank repeated for each column to be replaced
IgG_model_submission_df <- kept_columns %>%
  left_join(IgG_baseline_final_model_df, by = "SubjectID") %>%
  left_join(kept_columns_v2, by = "SubjectID")

# Combining the kept columns with the new age_rank columns
colnames(IgG_model_submission_df) <- submission_template_col_names

# View the updated submission template
datatable(IgG_model_submission_df)
#readr::write_tsv(IgG_model_submission_df, paste0(data_dir, "myIgGModel_submission_v20240927.tsv"))

Model 2: Age as predictor for all tasks

2.1) Model Construction

## Construct the model
# Calculating age_at_boost and rank for the age_at_boost column
subject_2023BD <- subject_specimen_2023BD %>%
  select(subject_id, year_of_birth, date_of_boost) %>%
  distinct() %>%
  mutate(age_at_boost = interval(ymd(year_of_birth), ymd(date_of_boost)) / years(1)) %>%
  mutate(age_rank = rank(age_at_boost))

age_model_df = subject_2023BD %>%
  dplyr::select(subject_id, age_rank) %>%
  rename("SubjectID" = "subject_id")  

age_final_model_df = age_model_df %>%
  mutate(
    "task1_1" = age_rank,
    "task1_2" = age_rank,
    "task2_1" = age_rank,
    "task2_2" = age_rank,
    "task3_1" = age_rank,
    "task3_2" = age_rank,
    "task4_1" = age_rank
  ) %>%
  dplyr::select(-age_rank)

datatable(age_model_df)
datatable(age_final_model_df)

2.2) Prepare submission file

# Creating a dataframe with age_rank repeated for each column to be replaced
age_model_submission_df <- kept_columns %>%
  left_join(age_final_model_df, by = "SubjectID")

# Combining the kept columns with the new age_rank columns
colnames(age_model_submission_df) <- submission_template_col_names

# View the updated submission template
datatable(age_model_submission_df)
#readr::write_tsv(age_model_submission_df, paste0(data_dir, "myAgeModel_submission_v20240927.tsv"))
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 20.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/Los_Angeles
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] DT_0.33          forcats_1.0.0    stringr_1.5.1    dplyr_1.1.4     
##  [5] purrr_1.0.2      readr_2.1.5      tidyr_1.3.1      tibble_3.2.1    
##  [9] ggplot2_3.5.1    tidyverse_2.0.0  lubridate_1.9.3  BiocStyle_2.32.1
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.9          utf8_1.2.4          generics_0.1.3     
##  [4] stringi_1.8.4       hms_1.1.3           digest_0.6.37      
##  [7] magrittr_2.0.3      evaluate_1.0.0      grid_4.4.1         
## [10] timechange_0.3.0    bookdown_0.40       fastmap_1.2.0      
## [13] jsonlite_1.8.9      BiocManager_1.30.25 fansi_1.0.6        
## [16] crosstalk_1.2.1     scales_1.3.0        jquerylib_0.1.4    
## [19] cli_3.6.3           crayon_1.5.3        rlang_1.1.4        
## [22] bit64_4.5.2         munsell_0.5.1       withr_3.0.1        
## [25] cachem_1.1.0        yaml_2.3.10         parallel_4.4.1     
## [28] tools_4.4.1         tzdb_0.4.0          colorspace_2.1-1   
## [31] vctrs_0.6.5         R6_2.5.1            lifecycle_1.0.4    
## [34] bit_4.5.0           htmlwidgets_1.6.4   vroom_1.6.5        
## [37] pkgconfig_2.0.3     pillar_1.9.0        bslib_0.8.0        
## [40] gtable_0.3.5        glue_1.7.0          xfun_0.47          
## [43] tidyselect_1.2.1    rstudioapi_0.16.0   knitr_1.48         
## [46] htmltools_0.5.8.1   rmarkdown_2.28      compiler_4.4.1