The training dataset for the Public challenge comprises three multi-omics datasets (designated as 2020, 2021 and 2022) and challenge dataset (2023) that require processing and normalization to generate computable matrices suitable for subsequent model development. While the procedures for data processing and normalization are inherently user-specific, the CMI-PB team has devised a data processing method, drawing inspiration from the approach used in the 2nd CMI-PB challenge. The codebase is also available on GitHub at [https://github.com/CMI-PB/cmi-pb-3rd-public-challenge-data-prep]. If you have specific questions, please contact us via Solutions center.
The data files for the Public CMI-PB challenge can be accessed at [https://www.cmi-pb.org/downloads/cmipb_challenge_datasets/current/3rd_challenge/raw_datasets/]. They are available for direct file download or as R data objects. For our notebook, we chose to download the data as R data objects. These objects contain both demographical metadata of subjects and experimental data, including:
were performed before and after booster vaccination.
base_dir = "/home/pshinde/repos/cmi-pb/public_challenge/training_preprocess/"
#base_dir = "../"
dir_raw_training <- paste0(base_dir, "data/raw_training_dataset/")
dir_raw_prediction <- paste0(base_dir, "data/raw_challenge_dataset/")
dir_RDS_objects <- paste0(base_dir, "data/output/")
## `codebase.R` installs required packages and all house keeping functions
source(paste0(base_dir, "scripts/codebase.R"))
master_database_data <- readRDS(paste0(dir_RDS_objects, "master_harmonized_data_v20240825.RDS"))
#training_dataset <- subset_dataset(master_database_data$training, c("2020_dataset", "2021_dataset", "2022_dataset"))
training_dataset <- master_database_data$training
challenge_dataset <- master_database_data$challenge
#subject_specimen <- master_database_data$subject_specimen %>%
# mutate(timepoint = planned_day_relative_to_boost)
training_subject_specimen <- training_dataset$subject_specimen %>%
select(c("specimen_id","subject_id","dataset","timepoint","infancy_vac","biological_sex","date_of_boost"))
challenge_subject_specimen <- challenge_dataset$subject_specimen %>%
select(c("specimen_id","subject_id","dataset","timepoint","infancy_vac","biological_sex","date_of_boost"))
subject_specimen = training_subject_specimen %>%
rbind(challenge_subject_specimen)
gene_90_38_export <- read_tsv(paste0(base_dir, "data/gene_90_38_export.tsv"))
mito_genes <- gene_90_38_export %>%
filter(substr(display_label, 1,3) == "MT-")
gene_90_38_shortlist <- gene_90_38_export %>%
filter(biotype == "protein_coding") %>%
filter(!versioned_ensembl_gene_id %in% mito_genes$versioned_ensembl_gene_id)
batch.factors = c("timepoint","infancy_vac","biological_sex","dataset")
data_obj = training_dataset
challenge_subject_specimen_baseline <- subject_specimen %>%
filter(dataset %in% c("2023_dataset")) %>%
filter(timepoint <= 40)
subject_specimen_baseline <- subject_specimen %>%
filter(timepoint <= 40)
batch.factors = c("timepoint","dataset")
tcellpol_wide_before_wide <- training_dataset$t_cell_polarization$wide %>%
rbind(challenge_dataset$t_cell_polarization$wide) %>%
select(-c("DMSO_P01579", "DMSO_Q16552", "DMSO_P05113"))
# Calculate the percentage of missing values for each column
missing_percent <- colMeans(is.na(tcellpol_wide_before_wide)) * 100
# Identify columns with more than 80% missing values
columns_with_high_na <- which(missing_percent > 80)
tcellpol_wide_before_wide = tcellpol_wide_before_wide %>%
filter(!specimen_id %in% c(1159)) %>%
column_to_rownames("specimen_id")%>%
t()
tcellpol_wide_before_wide_imputed = tcellpol_wide_before_wide[rowMeans(is.na(tcellpol_wide_before_wide)) < 1, ] %>%
as.matrix() %>%
impute.knn() %>%
.$data
pvca_analysis(tcellpol_wide_before_wide_imputed, subject_specimen, batch.factors, plot_title = "T cell polarization: Raw data")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
tcell_polarization_processed_data = list(
raw_data = tcellpol_wide_before_wide,
normalized_data = "Note: normalized_data data was not included, raw_data shows not batch effects. We suggest using raw_data data for t_cell_polarization assay",
batchCorrected_data = "Note: Batch-corrected data was not included, raw_data shows not batch effects. We suggest using raw_data data for t_cell_polarization assay"
)
batch.factors = c("timepoint","dataset")
tcell_activation_wide_before_wide <- training_dataset$t_cell_activation$wide %>%
rbind(challenge_dataset$t_cell_activation$wide) %>%
filter(specimen_id %in% subject_specimen$specimen_id) %>%
select(-c("DMSO")) %>%
column_to_rownames("specimen_id")%>%
t()
# Calculate the percentage of missing values for each column
missing_percent <- colMeans(is.na(tcell_activation_wide_before_wide)) * 100
# Identify columns with more than 80% missing values
columns_with_high_na <- which(missing_percent > 80)
pvca_analysis(tcell_activation_wide_before_wide, subject_specimen, batch.factors, plot_title = "T cell Activation: Raw data")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
tcell_activation_processed_data = list(
raw_data = tcell_activation_wide_before_wide,
normalized_data = "Note: normalized_data data was not included, raw_data show no dataset/batch effects. We suggest using raw_data data for t_cell_activation assay",
batchCorrected_data = "Note: Batch-corrected data was not included, raw_data show no dataset/batch effects. We suggest using raw_data data for t_cell_activation assay"
)
abtiter_wide_before <- training_dataset$plasma_antibody_levels$wide %>%
rbind(challenge_dataset$plasma_antibody_levels$wide) %>%
filter(specimen_id %in% subject_specimen$specimen_id) %>%
column_to_rownames("specimen_id")%>%
t()
abtiter_wide_before_long <- training_dataset$plasma_antibody_levels$long %>%
rbind(challenge_dataset$plasma_antibody_levels$long)
pvca_analysis(abtiter_wide_before, subject_specimen, batch.factors, plot_title = "Plasma Antibody titer: Raw data")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
ab_data_obj = list(
plasma_antibody_levels = list(
long = abtiter_wide_before_long,
wide = abtiter_wide_before
),
subject_specimen = subject_specimen
)
## Apply data normalization and batch correction
abtiter_data_processed = processAbtiter(ab_data_obj, BatchCorrection = TRUE)
## Joining with `by = join_by(specimen_id)`
## Found4batches
## Adjusting for0covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
pvca_analysis(abtiter_data_processed$normalized_data, subject_specimen, batch.factors, plot_title = "Plasma Antibody titer: Normalization")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`
pvca_analysis(abtiter_data_processed$batchCorrected_data, subject_specimen, batch.factors, plot_title = "Plasma Antibody titer: Normalization and batch effect correction")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`
abtiter_processed_data = list(
#metadata = rnaseq_metaData,
raw_data = abtiter_data_processed$raw_data,
normalized_data = abtiter_data_processed$normalized_data,
batchCorrected_data = abtiter_data_processed$batchCorrected_data
)
## Before normalization
cell_wide_before_wide <- training_dataset$pbmc_cell_frequency$wide %>%
filter(specimen_id %in% subject_specimen$specimen_id) %>%
rbind(challenge_dataset$pbmc_cell_frequency$wide) %>%
distinct() %>%
column_to_rownames("specimen_id")%>%
t()
pvca_analysis(cell_wide_before_wide, subject_specimen, batch.factors, plot_title = "PBMC Cell frequency: Raw data")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
## Normalized
cell_wide_before_long <- cell_wide_before_wide %>%
as.data.frame() %>%
rownames_to_column("cell_type_name") %>%
pivot_longer(!cell_type_name, names_to = "specimen_id", values_to = "percent_live_cell") %>%
mutate(specimen_id = as.numeric(specimen_id))
cell_data_obj = list(
pbmc_cell_frequency = list(
long = cell_wide_before_long,
wide = cell_wide_before_wide
),
subject_specimen = subject_specimen %>%
mutate(planned_day_relative_to_boost = timepoint)
)
## Assemble data
count_data_long = cell_wide_before_long
df_subject_specimen = subject_specimen %>%
mutate(planned_day_relative_to_boost = timepoint)
## Perform median normalization
cytof_median_D0 <- count_data_long %>%
left_join(df_subject_specimen[c("specimen_id", "dataset")]) %>%
filter(specimen_id %in% unique(df_subject_specimen[df_subject_specimen$planned_day_relative_to_boost == 0,]$specimen_id)) %>%
group_by(dataset, cell_type_name) %>%
summarise(median = median(percent_live_cell, na.rm = T))
## Joining with `by = join_by(specimen_id)`
## `summarise()` has grouped output by 'dataset'. You can override using the
## `.groups` argument.
cell_long_normalized_pre <- count_data_long %>%
left_join(df_subject_specimen[c("specimen_id", "dataset")]) %>%
left_join(cytof_median_D0) %>%
mutate(percent_live_cell_normalized = if_else(is.na(percent_live_cell) == T, NA, percent_live_cell/median))
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(cell_type_name, dataset)`
## Reshape dataframe in wide format
cell_wide_normalized_pre <- cell_long_normalized_pre %>%
dplyr::select(cell_type_name, specimen_id, percent_live_cell_normalized) %>%
pivot_wider(names_from = "cell_type_name", values_from = percent_live_cell_normalized) %>%
column_to_rownames("specimen_id")%>%
t()
cellFreq_normalized_imputed = cell_wide_normalized_pre[rowMeans(is.na(cell_wide_normalized_pre)) < 1, ] %>%
as.matrix() %>%
impute.knn() %>%
.$data
pvca_analysis(cellFreq_normalized_imputed, subject_specimen, batch.factors, plot_title = "PBMC Cell frequency: Normalized data")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`
## Batch correction
batch_lebels = as.data.frame(colnames(cellFreq_normalized_imputed)) %>%
rename(specimen_id = starts_with("colnames")) %>%
mutate(specimen_id = as.integer(specimen_id)) %>%
left_join(subject_specimen) %>%
dplyr::select(dataset)
## Joining with `by = join_by(specimen_id)`
cellFreq_batchCorrected = ComBat(cellFreq_normalized_imputed, batch = batch_lebels$dataset)
## Found4batches
## Adjusting for0covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
cellFreq_batchCorrected_imputed = cellFreq_batchCorrected[rowMeans(is.na(cellFreq_batchCorrected)) < 1, ] %>%
as.matrix() %>%
impute.knn() %>%
.$data
pvca_analysis(cellFreq_batchCorrected_imputed, subject_specimen, batch.factors, plot_title = "PBMC Cell frequency: Normalization and batch effect correction")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`
cell_frequency_processed_data = list(
raw_data = cell_wide_before_wide,
normalized_data = cellFreq_normalized_imputed,
batchCorrected_data = cellFreq_batchCorrected_imputed
)
## Before batch correction
olink_wide_before <- training_dataset$plasma_cytokine_concentrations_by_olink$wide %>%
rbind(challenge_dataset$plasma_cytokine_concentrations_by_olink$wide) %>%
filter(specimen_id %in% subject_specimen$specimen_id) %>%
column_to_rownames("specimen_id")%>%
t()
pvca_analysis(olink_wide_before, subject_specimen, batch.factors, plot_title = "Cytokine concetrations By Olink assay: Raw data")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
olink_wide_before_long <- olink_wide_before %>%
as.data.frame() %>%
rownames_to_column("protein_id") %>%
pivot_longer(!protein_id, names_to = "specimen_id", values_to = "concentration") %>%
mutate(specimen_id = as.numeric(specimen_id))
## Assemble data
count_data_long_olink = olink_wide_before_long
df_subject_specimen = subject_specimen %>%
mutate(planned_day_relative_to_boost = timepoint)
## Perform median normalization
cytokine_median_D0_olink <- count_data_long_olink %>%
left_join(df_subject_specimen[c("specimen_id", "dataset")]) %>%
filter(specimen_id %in% unique(df_subject_specimen[df_subject_specimen$planned_day_relative_to_boost == 0,]$specimen_id)) %>%
group_by(dataset, protein_id) %>%
summarise(median = median(concentration, na.rm = T))
## Joining with `by = join_by(specimen_id)`
## `summarise()` has grouped output by 'dataset'. You can override using the
## `.groups` argument.
cytokine_long_normalized_pre_olink <- count_data_long_olink %>%
left_join(df_subject_specimen[c("specimen_id", "dataset")]) %>%
left_join(cytokine_median_D0_olink) %>%
mutate(concentration_normalized = if_else(is.na(concentration) == T, NA, concentration/median))
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(protein_id, dataset)`
## Reshape dataframe in wide format
cytokine_wide_normalized_pre_olink <- cytokine_long_normalized_pre_olink %>%
dplyr::select(protein_id, specimen_id, concentration_normalized) %>%
pivot_wider(names_from = "protein_id", values_from = concentration_normalized) %>%
column_to_rownames("specimen_id")%>%
t()
cytokineFreq_normalized_imputed_olink = cytokine_wide_normalized_pre_olink[rowMeans(is.na(cytokine_wide_normalized_pre_olink)) < 1, ] %>%
as.matrix() %>%
impute.knn() %>%
.$data
pvca_analysis(cytokineFreq_normalized_imputed_olink, subject_specimen, batch.factors, plot_title = "Cytokine concetrations By OLINK: Normalization")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`
batch_lebels = as.data.frame(colnames(cytokineFreq_normalized_imputed_olink)) %>%
rename(specimen_id = starts_with("colnames")) %>%
mutate(specimen_id = as.integer(specimen_id)) %>%
left_join(df_subject_specimen) %>%
dplyr::select(dataset)
## Joining with `by = join_by(specimen_id)`
cytokineFreq_batchCorrected_olink = ComBat(cytokineFreq_normalized_imputed_olink, batch = batch_lebels$dataset)
## Found 3 genes with uniform expression within a single batch (all zeros); these will not be adjusted for batch.
## Found4batches
## Adjusting for0covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
cytokineFreq_batchCorrected_imputed_olink = cytokineFreq_batchCorrected_olink[rowMeans(is.na(cytokineFreq_batchCorrected_olink)) < 1, ] %>%
as.matrix() %>%
impute.knn() %>%
.$data
pvca_analysis(cytokineFreq_batchCorrected_imputed_olink, subject_specimen, batch.factors, plot_title = "Cytokine concetrations By OLINK: Normalization and batch effect correction")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`
olink_processed_data = list(
raw_data = olink_wide_before,
normalized_data = cytokineFreq_normalized_imputed_olink,
batchCorrected_data = cytokineFreq_batchCorrected_imputed_olink
)
#subject_specimen_legendplex = data_obj$subject_specimen %>%
# filter(!dataset %in% c("2020_dataset"))
## Before batch correction
legendplex_wide_before <- training_dataset$plasma_cytokine_concentrations_by_legendplex$wide %>%
rbind(challenge_dataset$plasma_cytokine_concentrations_by_legendplex$wide) %>%
column_to_rownames("specimen_id")%>%
t()
pvca_analysis(legendplex_wide_before, subject_specimen, batch.factors, plot_title = "Cytokine concentrations By legendplex assay: Raw data")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
legendplex_wide_before_long <- legendplex_wide_before %>%
as.data.frame() %>%
rownames_to_column("protein_id") %>%
pivot_longer(!protein_id, names_to = "specimen_id", values_to = "concentration") %>%
mutate(specimen_id = as.numeric(specimen_id))
## Assemble data
count_data_long = legendplex_wide_before_long
df_subject_specimen = subject_specimen %>%
mutate(planned_day_relative_to_boost = timepoint)
## Perform median normalization
cytokine_median_D0 <- count_data_long %>%
left_join(df_subject_specimen[c("specimen_id", "dataset")]) %>%
filter(specimen_id %in% unique(df_subject_specimen[df_subject_specimen$planned_day_relative_to_boost == 0,]$specimen_id)) %>%
group_by(dataset, protein_id) %>%
summarise(median = median(concentration, na.rm = T))
## Joining with `by = join_by(specimen_id)`
## `summarise()` has grouped output by 'dataset'. You can override using the
## `.groups` argument.
cytokine_long_normalized_pre <- count_data_long %>%
left_join(df_subject_specimen[c("specimen_id", "dataset")]) %>%
left_join(cytokine_median_D0) %>%
mutate(concentration_normalized = if_else(is.na(concentration) == T, NA, concentration/median))
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(protein_id, dataset)`
## Reshape dataframe in wide format
cytokine_wide_normalized_pre <- cytokine_long_normalized_pre %>%
dplyr::select(protein_id, specimen_id, concentration_normalized) %>%
pivot_wider(names_from = "protein_id", values_from = concentration_normalized) %>%
column_to_rownames("specimen_id")%>%
t()
cytokineFreq_normalized_imputed = cytokine_wide_normalized_pre[rowMeans(is.na(cytokine_wide_normalized_pre)) < 1, ] %>%
as.matrix() %>%
impute.knn() %>%
.$data
pvca_analysis(cytokineFreq_normalized_imputed, subject_specimen, batch.factors, plot_title = "Cytokine concentrations By legendplex assay: Raw data")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`
## Apply data normalization and batch correction
batch_lebels = as.data.frame(colnames(cytokineFreq_normalized_imputed)) %>%
rename(specimen_id = starts_with("colnames")) %>%
mutate(specimen_id = as.integer(specimen_id)) %>%
left_join(df_subject_specimen) %>%
dplyr::select(dataset)
## Joining with `by = join_by(specimen_id)`
cytokineFreq_batchCorrected = ComBat(cytokineFreq_normalized_imputed, batch = batch_lebels$dataset)
## Found3batches
## Adjusting for0covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
cytokineFreq_batchCorrected_imputed = cytokineFreq_batchCorrected[rowMeans(is.na(cytokineFreq_batchCorrected)) < 1, ] %>%
as.matrix() %>%
impute.knn() %>%
.$data
pvca_analysis(cytokineFreq_batchCorrected_imputed, subject_specimen, batch.factors, plot_title = "Cytokine concetrations By legendplex: Normalization and batch effect correction")
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`
## Note: Batch-corrected data was not included, as it resulted in over-correction and introduced unexpected variations.
legendplex_processed_data = list(
raw_data = legendplex_wide_before,
normalized_data = cytokineFreq_normalized_imputed,
batchCorrected_data = "Note: Batch-corrected data was not included, as it resulted in over-correction and introduced unexpected variations. We suggest using normalized_data data for legendplex assay"
)
rnaseq_countData <- training_dataset$pbmc_gene_expression$wide_raw_count %>%
rbind(challenge_dataset$pbmc_gene_expression$wide_raw_count) %>%
column_to_rownames("specimen_id") %>%
t() %>%
as.data.frame()
colnames(rnaseq_countData) = as.integer(colnames(rnaseq_countData))
rnaseq_metaData <- subject_specimen %>%
filter(specimen_id %in% colnames(rnaseq_countData)) %>%
mutate(specimen_id1 = specimen_id) %>%
column_to_rownames("specimen_id1")
#' Identify genes with rawcount >=1 that are present in at least 80% of either the aP(wP) cohort and absent in at least 80% of wP(aP) cohort.
threshold_proportion_greater_than_1 = 0.8
rawcount_sum_infancy_subgroup <- rnaseq_countData %>%
rownames_to_column("versioned_ensembl_gene_id") %>%
filter(versioned_ensembl_gene_id %in% gene_90_38_shortlist$versioned_ensembl_gene_id) %>%
pivot_longer(!versioned_ensembl_gene_id, values_to = "rawcount", names_to = "specimen_id") %>%
mutate(specimen_id = as.integer(specimen_id)) %>%
left_join(subject_specimen) %>%
group_by(dataset, versioned_ensembl_gene_id, infancy_vac) %>%
#group_by(versioned_ensembl_gene_id, infancy_vac) %>%
summarise(proportion_greater_than_1 = mean(rawcount >= 1)) %>%
pivot_wider(names_from = infancy_vac, values_from = proportion_greater_than_1) %>%
mutate(gene_meets_criterion_aP = aP >= threshold_proportion_greater_than_1 & wP <= (1 - threshold_proportion_greater_than_1),
gene_meets_criterion_wP = wP >= threshold_proportion_greater_than_1 & aP <= (1 - threshold_proportion_greater_than_1)
) %>%
filter((gene_meets_criterion_aP == TRUE & gene_meets_criterion_wP == FALSE) || (gene_meets_criterion_aP == FALSE & gene_meets_criterion_wP == TRUE))
## Joining with `by = join_by(specimen_id)`
## `summarise()` has grouped output by 'dataset', 'versioned_ensembl_gene_id'. You
## can override using the `.groups` argument.
#' Create a shortlist of genes (rawcount >= 1) in at least 30% of the specimens.
rawcount_shortlist <- rnaseq_countData %>%
rownames_to_column("versioned_ensembl_gene_id") %>%
filter(versioned_ensembl_gene_id %in% gene_90_38_shortlist$versioned_ensembl_gene_id) %>%
pivot_longer(!versioned_ensembl_gene_id, values_to = "rawcount", names_to = "specimen_id") %>%
mutate(specimen_id = as.integer(specimen_id)) %>%
left_join(subject_specimen) %>%
group_by(versioned_ensembl_gene_id) %>%
#group_by(versioned_ensembl_gene_id, infancy_vac) %>%
summarise(proportion = mean(rawcount >= 50)) %>%
filter(proportion >= 0.3)
## Joining with `by = join_by(specimen_id)`
## Before batch correction
rnaseq_countData_v2 <- rnaseq_countData %>%
rownames_to_column("versioned_ensembl_gene_id") %>%
filter(versioned_ensembl_gene_id %in% gene_90_38_shortlist$versioned_ensembl_gene_id) %>%
filter(!versioned_ensembl_gene_id %in% rawcount_sum_infancy_subgroup$versioned_ensembl_gene_id) %>%
filter(versioned_ensembl_gene_id %in% rawcount_shortlist$versioned_ensembl_gene_id) %>%
column_to_rownames("versioned_ensembl_gene_id")
mad_2020 <- mad_calculations(rnaseq_countData_v2, subject_specimen, c("2020_dataset"))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
mad_2021 <- mad_calculations(rnaseq_countData_v2, subject_specimen, c("2021_dataset"))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
mad_2022 <- mad_calculations(rnaseq_countData_v2, subject_specimen, c("2022_dataset"))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
mad_2023 <- mad_calculations(rnaseq_countData_v2, subject_specimen, c("2023_dataset"))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
mad_shotlisted_genes <- intersect(intersect(mad_2020$gene_id, mad_2021$gene_id), mad_2022$gene_id)
## mad_2023$gene_id genes were not included
rnaseq_countData_v3 <- rnaseq_countData_v2 %>%
rownames_to_column("versioned_ensembl_gene_id") %>%
filter(versioned_ensembl_gene_id %in% mad_shotlisted_genes) %>%
column_to_rownames("versioned_ensembl_gene_id")
pvca_analysis_rnaseq(rnaseq_countData_v3, subject_specimen, batch.factors, plot_title = "RNASeq Raw count: Raw data")
## Cluster size 6650 broken into 153 6497
## Done cluster 153
## Cluster size 6497 broken into 781 5716
## Done cluster 781
## Cluster size 5716 broken into 4101 1615
## Cluster size 4101 broken into 1183 2918
## Done cluster 1183
## Cluster size 2918 broken into 928 1990
## Done cluster 928
## Cluster size 1990 broken into 1171 819
## Done cluster 1171
## Done cluster 819
## Done cluster 1990
## Done cluster 2918
## Done cluster 4101
## Cluster size 1615 broken into 597 1018
## Done cluster 597
## Done cluster 1018
## Done cluster 1615
## Done cluster 5716
## Done cluster 6497
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
batch_labels = as.data.frame(colnames(rnaseq_countData_v3)) %>%
rename(specimen_id = starts_with("colnames")) %>%
mutate(specimen_id = as.integer(specimen_id)) %>%
left_join(rnaseq_metaData) %>%
dplyr::select(dataset)
## Joining with `by = join_by(specimen_id)`
rnaseq_batchCorrected_rawcount = sva::ComBat_seq(as.matrix(rnaseq_countData_v3), batch = batch_labels$dataset)
## Found 4 batches
## Using null model in ComBat-seq.
## Adjusting for 0 covariate(s) or covariate level(s)
## Estimating dispersions
## Fitting the GLM model
## Shrinkage off - using GLM estimates for parameters
## Adjusting the data
pvca_analysis_rnaseq(rnaseq_batchCorrected_rawcount, subject_specimen, batch.factors, plot_title = "RNASeq Raw count: Batch correction")
## Cluster size 6650 broken into 125 6525
## Done cluster 125
## Cluster size 6525 broken into 688 5837
## Done cluster 688
## Cluster size 5837 broken into 1660 4177
## Cluster size 1660 broken into 586 1074
## Done cluster 586
## Done cluster 1074
## Done cluster 1660
## Cluster size 4177 broken into 2300 1877
## Cluster size 2300 broken into 1115 1185
## Done cluster 1115
## Done cluster 1185
## Done cluster 2300
## Cluster size 1877 broken into 886 991
## Done cluster 886
## Done cluster 991
## Done cluster 1877
## Done cluster 4177
## Done cluster 5837
## Done cluster 6525
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`
rnaseq_normalised_data_rawcount = list(
raw_data = as.matrix(rnaseq_countData_v3),
batchCorrected_data = rnaseq_batchCorrected_rawcount
)
rnaseq_tpmcountData <- training_dataset$pbmc_gene_expression$wide_tpm %>%
rbind(challenge_dataset$pbmc_gene_expression$wide_tpm) %>%
column_to_rownames("specimen_id") %>%
t() %>%
as.data.frame()
colnames(rnaseq_tpmcountData) = as.integer(colnames(rnaseq_tpmcountData))
rnaseq_metaData <- subject_specimen %>%
filter(specimen_id %in% colnames(rnaseq_tpmcountData)) %>%
mutate(specimen_id1 = specimen_id) %>%
column_to_rownames("specimen_id1")
#' Identify genes with tpm >=1 that are present in at least 80% of either the aP(wP) cohort and absent in at least 80% of wP(aP) cohort.
threshold_proportion_greater_than_1 = 0.8
tpm_sum_infancy_subgroup <- rnaseq_tpmcountData %>%
rownames_to_column("versioned_ensembl_gene_id") %>%
filter(versioned_ensembl_gene_id %in% gene_90_38_shortlist$versioned_ensembl_gene_id) %>%
pivot_longer(!versioned_ensembl_gene_id, values_to = "tpm", names_to = "specimen_id") %>%
mutate(specimen_id = as.integer(specimen_id)) %>%
left_join(subject_specimen) %>%
group_by(dataset, versioned_ensembl_gene_id, infancy_vac) %>%
#group_by(versioned_ensembl_gene_id, infancy_vac) %>%
summarise(proportion_greater_than_1 = mean(tpm >= 1)) %>%
pivot_wider(names_from = infancy_vac, values_from = proportion_greater_than_1) %>%
mutate(gene_meets_criterion_aP = aP >= threshold_proportion_greater_than_1 & wP <= (1 - threshold_proportion_greater_than_1),
gene_meets_criterion_wP = wP >= threshold_proportion_greater_than_1 & aP <= (1 - threshold_proportion_greater_than_1)
) %>%
filter((gene_meets_criterion_aP == TRUE & gene_meets_criterion_wP == FALSE) || (gene_meets_criterion_aP == FALSE & gene_meets_criterion_wP == TRUE))
## Joining with `by = join_by(specimen_id)`
## `summarise()` has grouped output by 'dataset', 'versioned_ensembl_gene_id'. You
## can override using the `.groups` argument.
#' Create a shortlist of genes (tpm >= 1) in at least 30% of the specimens.
tpm_shortlist <- rnaseq_tpmcountData %>%
rownames_to_column("versioned_ensembl_gene_id") %>%
filter(versioned_ensembl_gene_id %in% gene_90_38_shortlist$versioned_ensembl_gene_id) %>%
pivot_longer(!versioned_ensembl_gene_id, values_to = "tpm", names_to = "specimen_id") %>%
mutate(specimen_id = as.integer(specimen_id)) %>%
left_join(subject_specimen) %>%
group_by(versioned_ensembl_gene_id) %>%
#group_by(versioned_ensembl_gene_id, infancy_vac) %>%
summarise(proportion = mean(tpm >= 5)) %>%
filter(proportion >= 0.3)
## Joining with `by = join_by(specimen_id)`
## Before batch correction
rnaseq_tpmcountData_v2 <- rnaseq_tpmcountData %>%
rownames_to_column("versioned_ensembl_gene_id") %>%
filter(versioned_ensembl_gene_id %in% gene_90_38_shortlist$versioned_ensembl_gene_id) %>%
filter(!versioned_ensembl_gene_id %in% tpm_sum_infancy_subgroup$versioned_ensembl_gene_id) %>%
filter(versioned_ensembl_gene_id %in% tpm_shortlist$versioned_ensembl_gene_id) %>%
column_to_rownames("versioned_ensembl_gene_id")
mad_2020 <- mad_calculations(rnaseq_countData_v2, subject_specimen, c("2020_dataset"))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
mad_2021 <- mad_calculations(rnaseq_countData_v2, subject_specimen, c("2021_dataset"))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
mad_2022 <- mad_calculations(rnaseq_countData_v2, subject_specimen, c("2022_dataset"))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
mad_2023 <- mad_calculations(rnaseq_countData_v2, subject_specimen, c("2023_dataset"))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
mad_shotlisted_genes <- intersect(intersect(mad_2020$gene_id, mad_2021$gene_id), mad_2022$gene_id)
## mad_2023$gene_id genes were not included
rnaseq_tpmcountData_v3 <- rnaseq_tpmcountData_v2 %>%
rownames_to_column("versioned_ensembl_gene_id") %>%
filter(versioned_ensembl_gene_id %in% mad_shotlisted_genes) %>%
column_to_rownames("versioned_ensembl_gene_id")
pvca_analysis_rnaseq(rnaseq_tpmcountData_v3, subject_specimen, batch.factors, plot_title = "RNASeq tpm: Raw data")
## Cluster size 6603 broken into 6511 92
## Cluster size 6511 broken into 6202 309
## Cluster size 6202 broken into 1166 5036
## Done cluster 1166
## Cluster size 5036 broken into 3436 1600
## Cluster size 3436 broken into 1647 1789
## Cluster size 1647 broken into 781 866
## Done cluster 781
## Done cluster 866
## Done cluster 1647
## Cluster size 1789 broken into 610 1179
## Done cluster 610
## Done cluster 1179
## Done cluster 1789
## Done cluster 3436
## Cluster size 1600 broken into 1044 556
## Done cluster 1044
## Done cluster 556
## Done cluster 1600
## Done cluster 5036
## Done cluster 6202
## Done cluster 309
## Done cluster 6511
## Done cluster 92
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
batch_labels = as.data.frame(colnames(rnaseq_tpmcountData_v3)) %>%
rename(specimen_id = starts_with("colnames")) %>%
mutate(specimen_id = as.integer(specimen_id)) %>%
left_join(rnaseq_metaData) %>%
dplyr::select(dataset)
## Joining with `by = join_by(specimen_id)`
rnaseq_tpmbatchCorrected = sva::ComBat_seq(as.matrix(rnaseq_tpmcountData_v3), batch = batch_labels$dataset)
## Found 4 batches
## Using null model in ComBat-seq.
## Adjusting for 0 covariate(s) or covariate level(s)
## Estimating dispersions
## Fitting the GLM model
## Shrinkage off - using GLM estimates for parameters
## Adjusting the data
pvca_analysis_rnaseq(rnaseq_tpmbatchCorrected, subject_specimen, batch.factors, plot_title = "RNASeq tpm: Batch correction")
## Cluster size 6603 broken into 6503 100
## Cluster size 6503 broken into 6103 400
## Cluster size 6103 broken into 1176 4927
## Done cluster 1176
## Cluster size 4927 broken into 1747 3180
## Cluster size 1747 broken into 1095 652
## Done cluster 1095
## Done cluster 652
## Done cluster 1747
## Cluster size 3180 broken into 1762 1418
## Cluster size 1762 broken into 836 926
## Done cluster 836
## Done cluster 926
## Done cluster 1762
## Done cluster 1418
## Done cluster 3180
## Done cluster 4927
## Done cluster 6103
## Done cluster 400
## Done cluster 6503
## Done cluster 100
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`Joining with `by = join_by(specimen_id)`
rnaseq_normalised_data_tpm = list(
raw_data = as.matrix(rnaseq_tpmcountData_v3),
batchCorrected_data = rnaseq_tpmbatchCorrected
)
rnaseq_normalised_data = list(
raw_count = rnaseq_normalised_data_rawcount,
tpm = rnaseq_normalised_data_tpm
)
master_processed_data <- list(
subject_specimen = subject_specimen,
plasma_ab_titer = abtiter_processed_data,
plasma_cytokine_concentrations_by_olink = olink_processed_data,
plasma_cytokine_concentrations_by_legendplex = legendplex_processed_data,
pbmc_cell_frequency = cell_frequency_processed_data,
pbmc_gene_expression = rnaseq_normalised_data,
t_cell_polarization= tcell_polarization_processed_data,
t_cell_activation = tcell_activation_processed_data
)
#sapply(master_normalized_data$subject_specimen, dim)
sapply(master_processed_data$abtiter_wide, dim)
## list()
sapply(master_processed_data$plasma_cytokine_concentrations_by_olink, dim)
## raw_data normalized_data batchCorrected_data
## [1,] 45 45 45
## [2,] 490 490 490
sapply(master_processed_data$plasma_cytokine_concentrations_by_legendplex, dim)
## $raw_data
## [1] 14 451
##
## $normalized_data
## [1] 14 451
##
## $batchCorrected_data
## NULL
sapply(master_processed_data$pbmc_cell_frequency, dim)
## raw_data normalized_data batchCorrected_data
## [1,] 39 39 39
## [2,] 546 546 546
sapply(master_processed_data$pbmc_gene_expression, dim)
## $raw_count
## NULL
##
## $tpm
## NULL
sapply(master_processed_data$t_cell_polarization, dim)
## $raw_data
## [1] 6 261
##
## $normalized_data
## NULL
##
## $batchCorrected_data
## NULL
sapply(master_processed_data$t_cell_activation, dim)
## $raw_data
## [1] 3 299
##
## $normalized_data
## NULL
##
## $batchCorrected_data
## NULL
saveRDS(master_processed_data, file = paste0(dir_RDS_objects, "master_processed_data_v20240825.RDS"))
#master_processed_data = readRDS(file = paste0(dir_RDS_objects, "master_allData_batchCorrected_v20240825.RDS"))
# Recursive function to save data frames (and lists): master_allData_batchCorrected_TSV
dir_rds_objects = dir_RDS_objects
save_dataframes_to_tsv(master_processed_data)
## [1] "--->subject_specimen"
## [1] "--->plasma_ab_titer"
## [1] "plasma_ab_titer--->plasma_ab_titer_raw_data"
## [1] "plasma_ab_titer--->plasma_ab_titer_normalized_data"
## [1] "plasma_ab_titer--->plasma_ab_titer_batchCorrected_data"
## [1] "--->plasma_cytokine_concentrations_by_olink"
## [1] "plasma_cytokine_concentrations_by_olink--->plasma_cytokine_concentrations_by_olink_raw_data"
## [1] "plasma_cytokine_concentrations_by_olink--->plasma_cytokine_concentrations_by_olink_normalized_data"
## [1] "plasma_cytokine_concentrations_by_olink--->plasma_cytokine_concentrations_by_olink_batchCorrected_data"
## [1] "--->plasma_cytokine_concentrations_by_legendplex"
## [1] "plasma_cytokine_concentrations_by_legendplex--->plasma_cytokine_concentrations_by_legendplex_raw_data"
## [1] "plasma_cytokine_concentrations_by_legendplex--->plasma_cytokine_concentrations_by_legendplex_normalized_data"
## [1] "plasma_cytokine_concentrations_by_legendplex--->plasma_cytokine_concentrations_by_legendplex_batchCorrected_data"
## [1] "--->pbmc_cell_frequency"
## [1] "pbmc_cell_frequency--->pbmc_cell_frequency_raw_data"
## [1] "pbmc_cell_frequency--->pbmc_cell_frequency_normalized_data"
## [1] "pbmc_cell_frequency--->pbmc_cell_frequency_batchCorrected_data"
## [1] "--->pbmc_gene_expression"
## [1] "pbmc_gene_expression--->pbmc_gene_expression_raw_count"
## [1] "pbmc_gene_expression_raw_count--->pbmc_gene_expression_raw_count_raw_data"
## [1] "pbmc_gene_expression_raw_count--->pbmc_gene_expression_raw_count_batchCorrected_data"
## [1] "pbmc_gene_expression--->pbmc_gene_expression_tpm"
## [1] "pbmc_gene_expression_tpm--->pbmc_gene_expression_tpm_raw_data"
## [1] "pbmc_gene_expression_tpm--->pbmc_gene_expression_tpm_batchCorrected_data"
## [1] "--->t_cell_polarization"
## [1] "t_cell_polarization--->t_cell_polarization_raw_data"
## [1] "t_cell_polarization--->t_cell_polarization_normalized_data"
## [1] "t_cell_polarization--->t_cell_polarization_batchCorrected_data"
## [1] "--->t_cell_activation"
## [1] "t_cell_activation--->t_cell_activation_raw_data"
## [1] "t_cell_activation--->t_cell_activation_normalized_data"
## [1] "t_cell_activation--->t_cell_activation_batchCorrected_data"
sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.2.2 (2022-10-31)
## os Ubuntu 22.04.3 LTS
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz Etc/UTC
## date 2024-08-29
## pandoc 2.19.2 @ /usr/lib/rstudio-server/bin/quarto/bin/tools/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## abind 1.4-5 2016-07-21 [1] RSPM (R 4.2.0)
## ade4 * 1.7-22 2023-02-06 [1] RSPM (R 4.2.0)
## affy 1.76.0 2022-11-01 [1] Bioconductor
## affyio 1.68.0 2022-11-01 [1] Bioconductor
## annotate 1.76.0 2022-11-01 [1] Bioconductor
## AnnotationDbi 1.60.2 2023-03-10 [1] Bioconductor
## assertthat 0.2.1 2019-03-21 [1] RSPM (R 4.2.0)
## backports 1.4.1 2021-12-13 [1] RSPM (R 4.2.0)
## base64enc 0.1-3 2015-07-28 [1] RSPM (R 4.2.0)
## basilisk 1.10.2 2022-11-08 [1] Bioconductor
## basilisk.utils 1.10.0 2022-11-01 [1] Bioconductor
## BatchQC * 1.26.0 2022-11-01 [1] Bioconductor
## Biobase * 2.58.0 2022-11-01 [1] Bioconductor
## BiocGenerics * 0.44.0 2022-11-01 [1] Bioconductor
## BiocManager * 1.30.20 2023-02-24 [1] RSPM (R 4.2.0)
## BiocParallel * 1.32.6 2023-03-17 [1] Bioconductor
## BiocStyle * 2.26.0 2022-11-01 [1] Bioconductor
## Biostrings 2.66.0 2022-11-01 [1] Bioconductor
## bit 4.0.5 2022-11-15 [1] RSPM (R 4.2.0)
## bit64 4.0.5 2020-08-30 [1] RSPM (R 4.2.0)
## bitops 1.0-7 2021-04-24 [1] RSPM (R 4.2.0)
## blob 1.2.3 2022-04-10 [1] RSPM (R 4.2.0)
## bookdown 0.33 2023-03-06 [1] RSPM (R 4.2.0)
## boot 1.3-28.1 2022-11-22 [1] RSPM (R 4.2.0)
## broom 1.0.4 2023-03-11 [1] RSPM (R 4.2.0)
## bslib 0.4.2 2022-12-16 [1] RSPM (R 4.2.0)
## ca 0.71.1 2020-01-24 [1] RSPM (R 4.2.0)
## cachem 1.0.7 2023-02-24 [1] RSPM (R 4.2.0)
## callr 3.7.3 2022-11-02 [1] RSPM (R 4.2.0)
## car 3.1-1 2022-10-19 [1] RSPM (R 4.2.0)
## carData 3.0-5 2022-01-06 [1] RSPM (R 4.2.0)
## caTools 1.18.2 2021-03-28 [1] RSPM (R 4.2.0)
## checkmate 2.1.0 2022-04-21 [1] RSPM (R 4.2.0)
## cli 3.6.0 2023-01-09 [1] RSPM (R 4.2.0)
## cluster 2.1.4 2022-08-22 [3] CRAN (R 4.2.2)
## coda 0.19-4 2020-09-30 [1] RSPM (R 4.2.0)
## codetools 0.2-18 2020-11-04 [3] CRAN (R 4.2.2)
## colorspace 2.1-0 2023-01-23 [1] RSPM (R 4.2.0)
## corpcor * 1.6.10 2021-09-16 [1] RSPM (R 4.2.0)
## corrplot * 0.92 2021-11-18 [1] RSPM (R 4.2.0)
## cowplot 1.1.1 2020-12-30 [1] RSPM (R 4.2.0)
## crayon 1.5.2 2022-09-29 [1] RSPM (R 4.2.0)
## data.table 1.14.8 2023-02-17 [1] RSPM (R 4.2.0)
## DBI 1.1.3 2022-06-18 [1] RSPM (R 4.2.0)
## DelayedArray 0.24.0 2022-11-01 [1] Bioconductor
## dendextend 1.16.0 2022-07-04 [1] RSPM (R 4.2.0)
## devtools * 2.4.5 2022-10-11 [1] RSPM (R 4.2.0)
## digest 0.6.31 2022-12-11 [1] RSPM (R 4.2.0)
## dir.expiry 1.6.0 2022-11-01 [1] Bioconductor
## dplyr * 1.1.0 2023-01-29 [1] RSPM (R 4.2.0)
## edgeR 3.40.2 2023-01-19 [1] Bioconductor
## ellipsis 0.3.2 2021-04-29 [1] RSPM (R 4.2.0)
## evaluate 0.20 2023-01-17 [1] RSPM (R 4.2.0)
## fansi 1.0.4 2023-01-22 [1] RSPM (R 4.2.0)
## farver 2.1.1 2022-07-06 [1] RSPM (R 4.2.0)
## fastmap 1.1.1 2023-02-24 [1] RSPM (R 4.2.0)
## filelock 1.0.2 2018-10-05 [1] RSPM (R 4.2.0)
## forcats * 1.0.0 2023-01-29 [1] RSPM (R 4.2.0)
## foreach 1.5.2 2022-02-02 [1] RSPM (R 4.2.0)
## foreign 0.8-83 2022-09-28 [3] CRAN (R 4.2.2)
## Formula 1.2-5 2023-02-24 [1] RSPM (R 4.2.0)
## fs 1.6.1 2023-02-06 [1] RSPM (R 4.2.0)
## genefilter * 1.80.3 2023-01-19 [1] Bioconductor
## generics 0.1.3 2022-07-05 [1] RSPM (R 4.2.0)
## GenomeInfoDb 1.34.9 2023-02-02 [1] Bioconductor
## GenomeInfoDbData 1.2.9 2023-08-09 [1] Bioconductor
## GenomicRanges 1.50.2 2022-12-16 [1] Bioconductor
## ggplot2 * 3.4.1 2023-02-10 [1] RSPM (R 4.2.0)
## ggpubr * 0.6.0 2023-02-10 [1] RSPM (R 4.2.0)
## ggrepel 0.9.3 2023-02-03 [1] RSPM (R 4.2.0)
## ggsignif 0.6.4 2022-10-13 [1] RSPM (R 4.2.0)
## ggvis 0.4.8 2023-03-08 [1] RSPM (R 4.2.0)
## glmnet * 4.1-6 2022-11-27 [1] RSPM (R 4.2.0)
## glue 1.6.2 2022-02-24 [1] RSPM (R 4.2.0)
## gplots 3.1.3 2022-04-25 [1] RSPM (R 4.2.0)
## graph 1.76.0 2022-11-01 [1] Bioconductor
## graphite 1.44.0 2022-11-01 [1] Bioconductor
## gridExtra 2.3 2017-09-09 [1] RSPM (R 4.2.0)
## GSEABase 1.60.0 2022-11-01 [1] Bioconductor
## gtable 0.3.1 2022-09-01 [1] RSPM (R 4.2.0)
## gtools 3.9.4 2022-11-27 [1] RSPM (R 4.2.0)
## HDF5Array 1.26.0 2022-11-01 [1] Bioconductor
## heatmaply 1.4.2 2023-01-07 [1] RSPM (R 4.2.0)
## hexbin 1.28.2 2021-01-08 [1] RSPM (R 4.2.0)
## highr 0.10 2022-12-22 [1] RSPM (R 4.2.0)
## Hmisc * 5.0-1 2023-03-08 [1] RSPM (R 4.2.0)
## hms 1.1.2 2022-08-19 [1] RSPM (R 4.2.0)
## htmlTable 2.4.1 2022-07-07 [1] RSPM (R 4.2.0)
## htmltools 0.5.4 2022-12-07 [1] RSPM (R 4.2.0)
## htmlwidgets 1.6.1 2023-01-07 [1] RSPM (R 4.2.0)
## httpuv 1.6.9 2023-02-14 [1] RSPM (R 4.2.0)
## httr 1.4.5 2023-02-24 [1] RSPM (R 4.2.0)
## impute * 1.72.3 2023-01-19 [1] Bioconductor
## IRanges 2.32.0 2022-11-01 [1] Bioconductor
## iterators 1.0.14 2022-02-05 [1] RSPM (R 4.2.0)
## jquerylib 0.1.4 2021-04-26 [1] RSPM (R 4.2.0)
## jsonlite 1.8.4 2022-12-06 [1] RSPM (R 4.2.0)
## KEGGREST 1.38.0 2022-11-01 [1] Bioconductor
## KernSmooth 2.23-20 2021-05-03 [3] CRAN (R 4.2.2)
## knitr 1.42 2023-01-25 [1] RSPM (R 4.2.0)
## labeling 0.4.2 2020-10-20 [1] RSPM (R 4.2.0)
## later 1.3.0 2021-08-18 [1] RSPM (R 4.2.0)
## lattice 0.20-45 2021-09-22 [3] CRAN (R 4.2.2)
## lazyeval 0.2.2 2019-03-15 [1] RSPM (R 4.2.0)
## lifecycle 1.0.3 2022-10-07 [1] RSPM (R 4.2.0)
## limma * 3.54.2 2023-02-28 [1] Bioconductor
## lme4 * 1.1-31 2022-11-01 [1] RSPM (R 4.2.0)
## locfit 1.5-9.7 2023-01-02 [1] RSPM (R 4.2.0)
## lubridate * 1.9.2 2023-02-10 [1] RSPM (R 4.2.0)
## made4 1.72.0 2022-11-01 [1] Bioconductor
## magrittr 2.0.3 2022-03-30 [1] RSPM (R 4.2.0)
## MASS 7.3-58.1 2022-08-03 [3] CRAN (R 4.2.2)
## Matrix * 1.5-1 2022-09-13 [3] CRAN (R 4.2.2)
## MatrixGenerics 1.10.0 2022-11-01 [1] Bioconductor
## MatrixModels 0.5-1 2022-09-11 [1] RSPM (R 4.2.0)
## matrixStats 0.63.0 2022-11-18 [1] RSPM (R 4.2.0)
## mcmc 0.9-7 2020-03-21 [1] RSPM (R 4.2.0)
## MCMCpack 1.6-3 2022-04-13 [1] RSPM (R 4.2.0)
## memoise 2.0.1 2021-11-26 [1] RSPM (R 4.2.0)
## mgcv * 1.8-41 2022-10-21 [3] CRAN (R 4.2.2)
## mice * 3.15.0 2022-11-19 [1] RSPM (R 4.2.0)
## mime 0.12 2021-09-28 [1] RSPM (R 4.2.0)
## miniUI 0.1.1.1 2018-05-18 [1] RSPM (R 4.2.0)
## minqa 1.2.5 2022-10-19 [1] RSPM (R 4.2.0)
## mnormt 2.1.1 2022-09-26 [1] RSPM (R 4.2.0)
## MOFA2 * 1.8.0 2022-11-01 [1] Bioconductor
## mogsa * 1.32.0 2022-11-01 [1] Bioconductor
## moments 0.14.1 2022-05-02 [1] RSPM (R 4.2.0)
## munsell 0.5.0 2018-06-12 [1] RSPM (R 4.2.0)
## nlme * 3.1-160 2022-10-10 [3] CRAN (R 4.2.2)
## nloptr 2.0.3 2022-05-26 [1] RSPM (R 4.2.0)
## nnet 7.3-18 2022-09-28 [3] CRAN (R 4.2.2)
## omicade4 * 1.38.0 2022-11-01 [1] Bioconductor
## pacman * 0.5.1 2019-03-11 [1] RSPM (R 4.2.0)
## pander 0.6.5 2022-03-18 [1] RSPM (R 4.2.0)
## pheatmap 1.0.12 2019-01-04 [1] RSPM (R 4.2.0)
## pillar 1.8.1 2022-08-19 [1] RSPM (R 4.2.0)
## pkgbuild 1.4.0 2022-11-27 [1] RSPM (R 4.2.0)
## pkgconfig 2.0.3 2019-09-22 [1] RSPM (R 4.2.0)
## pkgload 1.3.2 2022-11-16 [1] RSPM (R 4.2.0)
## plotly 4.10.1 2022-11-07 [1] RSPM (R 4.2.0)
## plyr 1.8.8 2022-11-11 [1] RSPM (R 4.2.0)
## png 0.1-8 2022-11-29 [1] RSPM (R 4.2.0)
## preprocessCore 1.60.2 2023-01-19 [1] Bioconductor
## prettyunits 1.1.1 2020-01-24 [1] RSPM (R 4.2.0)
## processx 3.8.0 2022-10-26 [1] RSPM (R 4.2.0)
## profvis 0.3.7 2020-11-02 [1] RSPM (R 4.2.0)
## promises 1.2.0.1 2021-02-11 [1] RSPM (R 4.2.0)
## ps 1.7.2 2022-10-26 [1] RSPM (R 4.2.0)
## psych * 2.2.9 2022-09-29 [1] RSPM (R 4.2.0)
## purrr * 1.0.1 2023-01-10 [1] RSPM (R 4.2.0)
## quantreg 5.94 2022-07-20 [1] RSPM (R 4.2.0)
## R6 2.5.1 2021-08-19 [1] RSPM (R 4.2.0)
## rappdirs 0.3.3 2021-01-31 [1] RSPM (R 4.2.0)
## RColorBrewer 1.1-3 2022-04-03 [1] RSPM (R 4.2.0)
## Rcpp 1.0.10 2023-01-22 [1] RSPM (R 4.2.0)
## RCurl 1.98-1.10 2023-01-27 [1] RSPM (R 4.2.0)
## readr * 2.1.4 2023-02-10 [1] RSPM (R 4.2.0)
## registry 0.5-1 2019-03-05 [1] RSPM (R 4.2.0)
## remotes 2.4.2 2021-11-30 [1] RSPM (R 4.2.0)
## reshape2 1.4.4 2020-04-09 [1] RSPM (R 4.2.0)
## reticulate 1.28 2023-01-27 [1] RSPM (R 4.2.0)
## rhdf5 2.42.1 2023-04-07 [1] Bioconductor
## rhdf5filters 1.10.1 2023-03-24 [1] Bioconductor
## Rhdf5lib 1.20.0 2022-11-01 [1] Bioconductor
## rlang 1.0.6 2022-09-24 [1] RSPM (R 4.2.0)
## rmarkdown 2.20 2023-01-19 [1] RSPM (R 4.2.0)
## rpart 4.1.19 2022-10-21 [1] RSPM (R 4.2.0)
## RSpectra * 0.16-1 2022-04-24 [1] RSPM (R 4.2.0)
## RSQLite 2.3.0 2023-02-17 [1] RSPM (R 4.2.0)
## rstatix 0.7.2 2023-02-01 [1] RSPM (R 4.2.0)
## rstudioapi 0.14 2022-08-22 [1] RSPM (R 4.2.0)
## Rtsne 0.16 2022-04-17 [1] RSPM (R 4.2.0)
## S4Vectors 0.36.2 2023-02-26 [1] Bioconductor
## sass 0.4.5 2023-01-24 [1] RSPM (R 4.2.0)
## scales 1.2.1 2022-08-20 [1] RSPM (R 4.2.0)
## scatterplot3d 0.3-42 2022-09-08 [1] RSPM (R 4.2.0)
## seriation 1.4.2 2023-03-08 [1] RSPM (R 4.2.0)
## sessioninfo 1.2.2 2021-12-06 [1] RSPM (R 4.2.0)
## shape 1.4.6 2021-05-19 [1] RSPM (R 4.2.0)
## shiny 1.7.4 2022-12-15 [1] RSPM (R 4.2.0)
## SparseM 1.81 2021-02-18 [1] RSPM (R 4.2.0)
## stringi 1.7.12 2023-01-11 [1] RSPM (R 4.2.0)
## stringr * 1.5.0 2022-12-02 [1] RSPM (R 4.2.0)
## SummarizedExperiment 1.28.0 2022-11-01 [1] Bioconductor
## survival 3.4-0 2022-08-09 [3] CRAN (R 4.2.2)
## sva * 3.46.0 2022-11-01 [1] Bioconductor
## svd 0.5.3 2023-01-15 [1] RSPM (R 4.2.0)
## tibble * 3.2.0 2023-03-08 [1] RSPM (R 4.2.0)
## tidyr * 1.3.0 2023-01-24 [1] RSPM (R 4.2.0)
## tidyselect 1.2.0 2022-10-10 [1] RSPM (R 4.2.0)
## tidyverse * 2.0.0 2023-02-22 [1] RSPM (R 4.2.0)
## timechange 0.2.0 2023-01-11 [1] RSPM (R 4.2.0)
## TSP 1.2-3 2023-03-08 [1] RSPM (R 4.2.0)
## tzdb 0.3.0 2022-03-28 [1] RSPM (R 4.2.0)
## urlchecker 1.0.1 2021-11-30 [1] RSPM (R 4.2.0)
## usethis * 2.1.6 2022-05-25 [1] RSPM (R 4.2.0)
## utf8 1.2.3 2023-01-31 [1] RSPM (R 4.2.0)
## uwot 0.1.14 2022-08-22 [1] RSPM (R 4.2.0)
## vctrs 0.5.2 2023-01-23 [1] RSPM (R 4.2.0)
## viridis 0.6.2 2021-10-13 [1] RSPM (R 4.2.0)
## viridisLite 0.4.1 2022-08-22 [1] RSPM (R 4.2.0)
## vroom 1.6.1 2023-01-22 [1] RSPM (R 4.2.0)
## vsn * 3.66.0 2022-11-01 [1] Bioconductor
## webshot 0.5.4 2022-09-26 [1] RSPM (R 4.2.0)
## withr 2.5.0 2022-03-03 [1] RSPM (R 4.2.0)
## xfun 0.37 2023-01-31 [1] RSPM (R 4.2.0)
## XML 3.99-0.13 2022-12-04 [1] RSPM (R 4.2.0)
## xtable 1.8-4 2019-04-21 [1] RSPM (R 4.2.0)
## XVector 0.38.0 2022-11-01 [1] Bioconductor
## yaml 2.3.7 2023-01-23 [1] RSPM (R 4.2.0)
## zlibbioc 1.44.0 2022-11-01 [1] Bioconductor
##
## [1] /home/pshinde/R/x86_64-pc-linux-gnu-library/4.2
## [2] /usr/local/lib/R/site-library
## [3] /usr/local/lib/R/library
##
## ──────────────────────────────────────────────────────────────────────────────