Two demo models are created to depict model construction and submission file preparation process for 3rd (Public) CMI-PB challenge.
Model 1: Baseline (day 0) IgG antibody titer against PT as predictor of two Antibody titer prediction tasks
Model 2: Age as predictor for all prediction tasks
library(lubridate)
library(tidyverse)
library(DT)
library(BiocStyle)
base_dir = "/home/pramod/Documents/GitHub/gitlab/submission_pipeline_demo/challenge3/"
data_dir = paste0(base_dir, "data/")
#load(paste0(base_dir, "code/codebase1.R"))
submission_template = readr::read_tsv(paste0(data_dir, "3rdChallengeSubmissionTemplate_revised.tsv"))
master_harmonized_data = read_rds(paste0(data_dir, "master_harmonized_data.RDS"))
#subject_2023BD = master_harmonized_data$challenge$subject_specimen
subject_specimen_2023BD = master_harmonized_data$challenge$subject_specimen
plasma_ab_titer_2023BD = master_harmonized_data$challenge$plasma_antibody_levels$long
## More resources
# Selecting the first four columns to keep as they are
kept_columns <- submission_template %>%
select(SubjectID, Age, BiologicalSexAtBirth, VaccinePrimingStatus)
# Getting the names of columns to be replaced with age_rank
submission_template_col_names <- names(submission_template)
IgG_baseline_model_df <- plasma_ab_titer_2023BD %>%
left_join(subject_specimen_2023BD) %>%
filter(isotype_antigen == "IgG_PT") %>%
filter(planned_day_relative_to_boost == 0) %>%
dplyr::select(subject_id, MFI_normalised) %>%
mutate(IgG_baseline_rank = rank(MFI_normalised)) %>%
dplyr::select(subject_id, IgG_baseline_rank) %>%
rename("SubjectID" = "subject_id")
IgG_baseline_final_model_df <- IgG_baseline_model_df %>%
mutate(
"task1_1" = IgG_baseline_rank,
"task1_2" = IgG_baseline_rank
) %>%
dplyr::select(-IgG_baseline_rank)
datatable(IgG_baseline_model_df)
datatable(IgG_baseline_final_model_df)
# Selecting the first four columns to keep as they are
kept_columns_v2 <- submission_template %>%
select(SubjectID, "2.1) Monocytes-D1-Rank", "2.2) Monocytes-D1-FC-Rank","3.1) CCL3-D3-Rank","3.2) CCL3-D3-FC-Rank", "4.1) IFNG/IL5-PolarizationRatio-D28-Rank")
# Creating a dataframe with age_rank repeated for each column to be replaced
IgG_model_submission_df <- kept_columns %>%
left_join(IgG_baseline_final_model_df, by = "SubjectID") %>%
left_join(kept_columns_v2, by = "SubjectID")
# Combining the kept columns with the new age_rank columns
colnames(IgG_model_submission_df) <- submission_template_col_names
# View the updated submission template
datatable(IgG_model_submission_df)
#readr::write_tsv(IgG_model_submission_df, paste0(data_dir, "myIgGModel_submission_v20240927.tsv"))
## Construct the model
# Calculating age_at_boost and rank for the age_at_boost column
subject_2023BD <- subject_specimen_2023BD %>%
select(subject_id, year_of_birth, date_of_boost) %>%
distinct() %>%
mutate(age_at_boost = interval(ymd(year_of_birth), ymd(date_of_boost)) / years(1)) %>%
mutate(age_rank = rank(age_at_boost))
age_model_df = subject_2023BD %>%
dplyr::select(subject_id, age_rank) %>%
rename("SubjectID" = "subject_id")
age_final_model_df = age_model_df %>%
mutate(
"task1_1" = age_rank,
"task1_2" = age_rank,
"task2_1" = age_rank,
"task2_2" = age_rank,
"task3_1" = age_rank,
"task3_2" = age_rank,
"task4_1" = age_rank
) %>%
dplyr::select(-age_rank)
datatable(age_model_df)
datatable(age_final_model_df)
# Creating a dataframe with age_rank repeated for each column to be replaced
age_model_submission_df <- kept_columns %>%
left_join(age_final_model_df, by = "SubjectID")
# Combining the kept columns with the new age_rank columns
colnames(age_model_submission_df) <- submission_template_col_names
# View the updated submission template
datatable(age_model_submission_df)
#readr::write_tsv(age_model_submission_df, paste0(data_dir, "myAgeModel_submission_v20240927.tsv"))
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: America/Los_Angeles
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] DT_0.33 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [5] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [9] ggplot2_3.5.1 tidyverse_2.0.0 lubridate_1.9.3 BiocStyle_2.32.1
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.9 utf8_1.2.4 generics_0.1.3
## [4] stringi_1.8.4 hms_1.1.3 digest_0.6.37
## [7] magrittr_2.0.3 evaluate_1.0.0 grid_4.4.1
## [10] timechange_0.3.0 bookdown_0.40 fastmap_1.2.0
## [13] jsonlite_1.8.9 BiocManager_1.30.25 fansi_1.0.6
## [16] crosstalk_1.2.1 scales_1.3.0 jquerylib_0.1.4
## [19] cli_3.6.3 crayon_1.5.3 rlang_1.1.4
## [22] bit64_4.5.2 munsell_0.5.1 withr_3.0.1
## [25] cachem_1.1.0 yaml_2.3.10 parallel_4.4.1
## [28] tools_4.4.1 tzdb_0.4.0 colorspace_2.1-1
## [31] vctrs_0.6.5 R6_2.5.1 lifecycle_1.0.4
## [34] bit_4.5.0 htmlwidgets_1.6.4 vroom_1.6.5
## [37] pkgconfig_2.0.3 pillar_1.9.0 bslib_0.8.0
## [40] gtable_0.3.5 glue_1.7.0 xfun_0.47
## [43] tidyselect_1.2.1 rstudioapi_0.16.0 knitr_1.48
## [46] htmltools_0.5.8.1 rmarkdown_2.28 compiler_4.4.1