Two demo models are created to depict model construction and submission file preparation process for 3rd (Public) CMI-PB challenge.

  • Model 1: Baseline (day 0) IgG antibody levels against PT as a predictor of two Antibody levels tasks

  • Model 2: Age as a predictor for all tasks

Load resources

library(lubridate)
library(tidyverse)
library(DT)
library(BiocStyle)


base_dir = "/home/pshinde/repos/cmi-pb/public_challenge/training_preprocess/"
data_dir = paste0(base_dir, "data/")
#load(paste0(base_dir, "code/codebase1.R"))

submission_template = readr::read_tsv(paste0(data_dir, "3rdChallengeSubmissionTemplate.tsv"))
subject_2023BD = readr::read_tsv(paste0(data_dir, "raw_datasets/challenge_dataset/2023BD_subject.tsv"))
specimen_2023BD = readr::read_tsv(paste0(data_dir, "raw_datasets/challenge_dataset/2023BD_specimen.tsv")) %>%
  left_join(subject_2023BD)

plasma_ab_titer_2023BD = readr::read_tsv(paste0(data_dir, "raw_datasets/challenge_dataset/2023BD_plasma_ab_titer.tsv"))

## More resources
# Selecting the first four columns to keep as they are
kept_columns <- submission_template %>%
  select(`SubjectID`, Age, `BiologicalSexAtBirth`, `VaccinePrimingStatus`)

# Getting the names of columns to be replaced with age_rank
submission_template_col_names <- names(submission_template)

Model 1: Baseline (day 0) IgG antibody levels against PT as a predictor of two Antibody levels tasks

1.1) Model Construction

IgG_baseline_model_df <- plasma_ab_titer_2023BD %>%
  left_join(specimen_2023BD) %>%
  filter(isotype == "IgG" & antigen == "PT")  %>%
  filter(planned_day_relative_to_boost == 0)  %>%
  dplyr::select(subject_id, MFI_normalised)  %>%
  mutate(IgG_baseline_rank = rank(MFI_normalised)) %>%
  dplyr::select(subject_id, IgG_baseline_rank) %>%
  rename("SubjectID" = "subject_id") 

IgG_baseline_final_model_df <- IgG_baseline_model_df %>%
  mutate(
    "task1_1" = IgG_baseline_rank,
    "task1_2" = IgG_baseline_rank
  ) %>%
  dplyr::select(-IgG_baseline_rank)


datatable(IgG_baseline_model_df)

1.2) Prepare submission file

# Selecting the first four columns to keep as they are
kept_columns_v2 <- submission_template %>%
  select(`SubjectID`, "2.1) Monocytes-D1-Rank",  "2.2) Monocytes-D1-FC-Rank","3.1) CCL3-D3-Rank","3.2) CCL3-D3-FC-Rank")

# Creating a dataframe with age_rank repeated for each column to be replaced
IgG_model_submission_df <- kept_columns %>%
  left_join(IgG_baseline_final_model_df, by = "SubjectID") %>%
  left_join(kept_columns_v2, by = "SubjectID")

# Combining the kept columns with the new age_rank columns
colnames(IgG_model_submission_df) <- submission_template_col_names

# View the updated submission template
datatable(IgG_model_submission_df)

Model 2: Age as predictor for all tasks

2.1) Model Construction

## Construct the model
# Calculating age_at_boost and rank for the age_at_boost column
subject_2023BD <- subject_2023BD %>%
  mutate(age_at_boost = interval(ymd(year_of_birth), ymd(date_of_boost)) / years(1)) %>%
  mutate(age_rank = rank(age_at_boost))

age_model_df = subject_2023BD %>%
  dplyr::select(subject_id, age_rank) %>%
  rename("SubjectID" = "subject_id")  

age_final_model_df = age_model_df %>%
  mutate(
    "task1_1" = age_rank,
    "task1_2" = age_rank,
    "task2_1" = age_rank,
    "task2_2" = age_rank,
    "task3_1" = age_rank,
    "task3_2" = age_rank
  ) %>%
  dplyr::select(-age_rank)

datatable(age_model_df)
datatable(age_final_model_df)

2.2) Prepare submission file

# Creating a dataframe with age_rank repeated for each column to be replaced
age_model_submission_df <- kept_columns %>%
  left_join(age_final_model_df, by = "SubjectID")

# Combining the kept columns with the new age_rank columns
colnames(age_model_submission_df) <- submission_template_col_names

# View the updated submission template
datatable(age_model_submission_df)
#readr::write_tsv(age_model_submission_df, paste0(data_dir, "myAgeModel_submission_v20240810.tsv"))