Load Data
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
cellDf1 <- read_tsv(here("./data/2021LD_pbmc_cell_frequency.tsv"), show_col_types = FALSE)%>%
select(specimen_id, cell_type_name, percent_live_cell) %>%
filter(cell_type_name %in% cells)
cellDf <- cellDf1%>%
pivot_wider(names_from = cell_type_name, values_from = percent_live_cell) %>%
column_to_rownames("specimen_id")
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
rnaDf <- read_tsv(here("./data/2021LD_pbmc_gene_expression.tsv"), show_col_types = FALSE) %>%
select(specimen_id, versioned_ensembl_gene_id, tpm)
# filter(versioned_ensembl_gene_id %in% genes)
# tpm_shortlist <- rnaDf %>%
# mutate(specimen_id = as.integer(specimen_id)) %>%
# group_by(versioned_ensembl_gene_id) %>%
# summarise(proportion = mean(tpm >= 1)) %>%
# filter(proportion >= 0.8)
rnaDf <- rnaDf %>%
# filter(versioned_ensembl_gene_id %in% tpm_shortlistrnaDf $versioned_ensembl_gene_id) %>%
filter(versioned_ensembl_gene_id %in% genes) %>%
mutate(versioned_ensembl_gene_id=gsub("\\..*", "", versioned_ensembl_gene_id))
par(mar = c(4, 4, .1, .1))
histogram(rnaDf, "tpm")
dist_histogram(rnaDf, "tpm")
# removed outlier RNA
print("Remove Outliers")
## [1] "Remove Outliers"
outliers <- boxplot(rnaDf$tpm, plot=FALSE)$out
par(mar = c(4, 4, .1, .1))
histogram(rnaDf[-which(rnaDf$tpm %in% outliers),], "tpm")
dist_histogram(rnaDf[-which(rnaDf$tpm %in% outliers),], "tpm")
rnaDf <- rnaDf %>%
pivot_wider(names_from = versioned_ensembl_gene_id, values_from = tpm) %>%
column_to_rownames("specimen_id")




knitr::opts_chunk$set(warning = FALSE, message = FALSE)
abtiterDf <- read_tsv(here("./data/2021LD_plasma_ab_titer.tsv"), show_col_types = FALSE)%>%
filter(!isotype %in% "IgE") %>%
mutate(isotype_antigen = paste0(isotype, "_", antigen)) %>%
select(specimen_id, isotype_antigen, MFI_normalised) %>%
filter(isotype_antigen %in% antigens) %>%
pivot_wider(names_from = isotype_antigen, values_from = MFI_normalised) %>%
column_to_rownames("specimen_id")
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
olinkDf <- read_tsv(here("./data/2021LD_plasma_cytokine_concentration.tsv"), show_col_types = FALSE) %>%
select(specimen_id, protein_id, protein_expression) %>%
filter(specimen_id %in% samples & protein_id %in% proteins) %>%
pivot_wider(names_from = protein_id, values_from = protein_expression) %>%
column_to_rownames("specimen_id")
dataList <- list()
dataList[["original"]] <- list("rnaseq"= t(rnaDf),
"abtiter"= t(abtiterDf),
"cytof"= t(cellDf),
"olink"= t(olinkDf)
)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
all_samples <- rnaDf %>%
rownames_to_column("specimen_id") %>%
select(specimen_id)
mofa_rnaseq <- rnaDf %>% t()
mofa_abtiter <- all_samples %>%
left_join(abtiterDf %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")%>%
t()
mofa_cytof <- all_samples %>%
left_join(cellDf %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")%>%
t()
mofa_olink <- all_samples %>%
left_join(olinkDf %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id") %>%
t()
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
dataList[["mofa"]] <- list(
"rnaseq"= mofa_rnaseq,
"abtiter"= mofa_abtiter,
"cytof"= mofa_cytof,
"olink"= mofa_olink)
dataList$mofa <- lapply(dataList$mofa, as.matrix)
MOFAobject <- create_mofa(dataList$mofa)
plot_data_overview(MOFAobject)

data_opts <- get_default_data_options(MOFAobject)
model_opts <- get_default_model_options(MOFAobject)
model_opts$num_factors <- 15
train_opts <- get_default_training_options(MOFAobject)
train_opts$convergence_mode <- "fast"
train_opts$seed <- 42
MOFAobject <- prepare_mofa(MOFAobject,
data_options = data_opts,
model_options = model_opts,
training_options = train_opts
)
MOFAobject <- run_mofa(MOFAobject, use_basilisk = TRUE)
plot_factor_cor(MOFAobject)

plot_variance_explained(MOFAobject, max_r2=15)

plot_variance_explained(MOFAobject, plot_total = T)[[2]]

na2median <- function(df) {
df <- data.frame(df)
df <- t(data.frame(t(df)) %>%
mutate(across(1:dim(df)[1], ~replace_na(., median(., na.rm=TRUE)))))
colnames(df) <- gsub("X", "", colnames(df))
return(as.matrix(df))
}
cellDf_2020 <- read_tsv(here("./data/2020LD_pbmc_cell_frequency.tsv"), show_col_types = FALSE)%>%
select(specimen_id, cell_type_name, percent_live_cell) %>%
filter(cell_type_name %in% cells) %>%
pivot_wider(names_from = cell_type_name, values_from = percent_live_cell) %>%
column_to_rownames("specimen_id") %>%
t()
fullcellDf <- t(cbind(cellDf_2020, t(cellDf)))
batch <- c(rep('2020', 165), rep('2021', 100))
# impCellDf <- na_mean((fullcellDf))
impCellDf <- na2median((fullcellDf))
cellDf_all <- ComBat(dat=t(impCellDf), batch=batch, par.prior=FALSE)
cellDf_pLonger <-data.frame(cellDf_all) %>%
pivot_longer(everything(), names_to = "specimen_id") %>%
mutate(specimen_id=gsub("X", "", specimen_id))
cellDf_2021 <- cellDf_pLonger %>% filter(specimen_id %in% rownames(cellDf))
print("Cell Freq before Batch Correction")
## [1] "Cell Freq before Batch Correction"
par(mar = c(4, 4, .1, .1))
histogram(cellDf1, "percent_live_cell")
dist_histogram(cellDf1, "percent_live_cell")
print("Cell Freq after Batch Correction")
## [1] "Cell Freq after Batch Correction"
par(mar = c(4, 4, .1, .1))
histogram(cellDf_2021, "value")
dist_histogram(cellDf_2021, "value")




knitr::opts_chunk$set(warning = FALSE, message = FALSE)
colnames(cellDf_all) <- gsub("X", "", colnames(cellDf_all))
cell_BC <- data.frame(t(cellDf_all[, rownames(cellDf)]))
cell_BC$specimen_id <- rownames(cell_BC)
cellDf_BC <- full_join(all_samples, cell_BC, by="specimen_id") %>%
column_to_rownames("specimen_id")%>%
t()
dataList[["batchCorrected"]] <- list(
"rnaseq"= mofa_rnaseq,
"abtiter"= mofa_abtiter,
"cytof"= cellDf_BC,
"olink"= mofa_olink)
dataList$batchCorrected <- lapply(dataList$batchCorrected, as.matrix)
MOFAobject <- create_mofa(dataList$batchCorrected)
plot_data_overview(MOFAobject)

data_opts <- get_default_data_options(MOFAobject)
model_opts <- get_default_model_options(MOFAobject)
model_opts$num_factors <- 15
train_opts <- get_default_training_options(MOFAobject)
train_opts$convergence_mode <- "fast"
train_opts$seed <- 42
MOFAobject <- prepare_mofa(MOFAobject,
data_options = data_opts,
model_options = model_opts,
training_options = train_opts
)
MOFAobject <- run_mofa(MOFAobject, use_basilisk = TRUE)
plot_factor_cor(MOFAobject)

plot_variance_explained(MOFAobject, max_r2=15)

plot_variance_explained(MOFAobject, plot_total = T)[[2]]
