library(readr)
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
##
## extract
library(tibble)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(ggupset)
library(circlize)
## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.14.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(patchwork)
# library(GenomicRanges)
print(getwd())
## [1] "/Users/rnili/Desktop/Challegne_2_data"
base_path <- "/Users/rnili/Desktop/Challegne_2_data"
list.files(path=paste0(base_path,"/2020_LD"))
## [1] "2020LD_pbmc_cell_frequency.tsv"
## [2] "2020LD_pbmc_gene_expression.tsv"
## [3] "2020LD_plasma_ab_titer.tsv"
## [4] "2020LD_plasma_protein_expression.tsv"
## [5] "2020LD_specimen.tsv"
## [6] "2020LD_subject.tsv"
list.files(path=paste0(base_path))
## [1] "2020_LD"
## [2] "2021_LD"
## [3] "2022_BD"
## [4] "Ab_titer.pdf"
## [5] "antigen.pdf"
## [6] "Cell_frequency.pdf"
## [7] "cell_type_name.pdf"
## [8] "clinical_data.csv"
## [9] "dataExploration.log"
## [10] "dataExploration.pdf"
## [11] "dataExploration.R"
## [12] "dataExploration.spin.R"
## [13] "dataExploration.spin.Rmd"
## [14] "dataExploration.tex"
## [15] "dataExploration1.html"
## [16] "Demographic_Longitudinal_sampleMapping.pdf"
## [17] "freq_cell.csv"
## [18] "freq_cell0.csv"
## [19] "freq_cell1.csv"
## [20] "freq_cell2.csv"
## [21] "freq_cell3.csv"
## [22] "gene_id.pdf"
## [23] "mat_all.csv"
## [24] "Olink.pdf"
## [25] "RNASeq.pdf"
## [26] "subjects.pdf"
## [27] "uniprot.pdf"
folders = c("2020_LD", "2021_LD", "2022_BD")
info_binder <- function(path, folders, sufix){
df_total <- data.frame()
for (i in 1:length(folders)){
name = gsub('_', '', folders[i])
file_name <- sprintf("/%s/%s_%s.tsv", folders[i], name, sufix)
df <- readr::read_tsv(paste0(base_path, file_name), show_col_types = FALSE)
df_total <- rbind(df_total, df)
}
return(df_total)
}
experiments_binder <- function(path, clinical_data, folders, sufix){
df_total <- data.frame()
for (i in 1:length(folders)){
name = gsub('_', '', folders[i])
file_name <- sprintf("/%s/%s_%s.tsv", folders[i], name, sufix)
df <- readr::read_tsv(paste0(base_path, file_name), show_col_types = FALSE)%>%
dplyr::left_join(., clinical_data)
df$year <- name
# print(header(df))
df_total <- rbind(df_total, df)
}
return(df_total)
}
demographic_plots <- function(clinical_data, count_data, plotTitle){
print(plotTitle)
print(paste0("Number of subjects: ", length(unique( count_data$subject_id))))
print(paste0("Number of specimen: ", length(unique( count_data$specimen_id))))
#
# print(colnames(count_data))
#
# print(count_data$dataset)
plot_data <- count_data %>%
distinct(., subject_id, biological_sex,race, ethnicity, infancy_vac, dataset) %>%
group_by(dataset)
print(plot_data)
# Distribution of subjects by sex
p1 <- plot_data %>%
ggplot(., aes(x=dataset, fill = biological_sex)) +
geom_bar(position = "dodge") +
theme_minimal() +
ggtitle(plotTitle) +
# theme_minimal()
theme(axis.ticks.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))+
ggtitle(plotTitle)
# Distribution of subjects by ethnicity
p2 <- plot_data %>%
ggplot(., aes(x=dataset, fill = ethnicity)) +
geom_bar(position = "dodge") +
# theme_minimal()+
theme(axis.ticks.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))+
plot_layout(widths = 5)
# Distribution of subjects by race
p3 <- plot_data %>%
ggplot(., aes(x=dataset, fill = race)) +
geom_bar(position = "dodge") +
# theme_minimal()+
theme(axis.ticks.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))+
plot_layout(widths = 5)
# Distribution of subjects by infancy_vac
p4 <- plot_data %>%
ggplot(., aes(x=dataset, fill = infancy_vac)) +
geom_bar(position = "dodge") +
# theme_minimal() +
theme(axis.ticks.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))+
plot_layout(widths = 5)
plotDF <- clinical_data %>%
filter(specimen_id %in% count_data$specimen_id) %>%
add_column(flag = 1) %>%
dplyr::select(subject_id, Timepoint, flag) %>%
distinct() %>%
pivot_wider(names_from = Timepoint, values_from = flag) %>%
dplyr::select(-subject_id) %>%
group_by_all() %>%
summarize(n = n()) %>%
arrange(desc(n)) %>%
rowid_to_column() %>%
pivot_longer(cols = -c(rowid, n), names_to = "Timepoint") %>%
filter(!is.na(value))
print(plotDF)
p5 <- ggplot(data = plotDF,
mapping = aes(x = as.numeric(Timepoint), y = rowid)) +
geom_point(mapping = aes(size = n)) +
geom_line(mapping = aes(group = rowid)) +
scale_y_continuous(breaks = 1:max(plotDF$rowid),
labels = distinct(dplyr::select(plotDF,rowid, n))$n) +
labs(y = "Number of Subjects", x = "Timepoint") +
ggtitle(plotTitle) +
theme_bw()+
theme(axis.ticks.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))+plot_layout(widths = 5)
#plot as grid
knitr::opts_chunk$set(fig.width=14, fig.height=6)
hlay1 <- rbind(c(1,2),
c(3,3))
hlay2 <- rbind(c(1,2))
# options(repr.plot.width =12, repr.plot.height =4)
grid.arrange(p5, p4, ncol = 2, layout_matrix=hlay2)
# options(repr.plot.width =12, repr.plot.height =6)
grid.arrange(p1, p2, p3, ncol = 2, layout_matrix=hlay1)
}
upSetExpList <- function(years, olinkDF, rnaDF, cellDF, abtiterDF){
exp_list = list()
for (i in 1:length(years)){
olinkln <- paste('olink',years[i],sep='')
exp_list[[olinkln]] <- unlist(unique(olinkDF[grep(years[i], olinkDF$dataset), 'subject_id']), use.names = FALSE)
rnaln <- paste('rnaseq',years[i],sep='')
exp_list[[rnaln]] <- unlist(unique(rnaDF[grep(years[i], rnaDF$dataset), 'subject_id']), use.names = FALSE)
cellln <- paste('cell_freq',years[i],sep='')
exp_list[[cellln]] <- unlist(unique(cellDF[grep(years[i], cellDF$dataset), 'subject_id']), use.names = FALSE)
abtiterln <- paste('abtiter',years[i],sep='')
exp_list[[abtiterln]] <- unlist(unique(abtiterDF[grep(years[i], abtiterDF$dataset), 'subject_id']), use.names = FALSE)
}
return(exp_list)
}
upSetList <- function(years, df, target_col){
exp_list = list()
for (i in 1:length(years)){
uniprotln <- paste(target_col,years[i],sep='')
exp_list[[uniprotln]] <- unlist(unique(df[grep(years[i], df$dataset), target_col]), use.names = FALSE)
}
return(exp_list)
}
upset_plot1 <- function(mat){
plot <- UpSet(mat,
comb_col = c("red", "blue", "brown", "black")[comb_degree(mat)],
top_annotation = upset_top_annotation(mat, add_numbers = TRUE, gp = gpar(col = comb_degree(mat))),
right_annotation = upset_right_annotation(mat, add_numbers = TRUE)
)
}
## Read subject data
subject <- info_binder(base_path, folders, "subject")
print(dim(subject))
## [1] 118 8
## Read specimen data
specimen <- info_binder(base_path, folders, "specimen")
print(dim(specimen))
## [1] 939 6
## Timepoint: days passed from the boost date for taking the specimen relevantto the visit number
## Join subject and specimen data
clinical_data <- dplyr::inner_join(subject, specimen, multiple = "all")
## Joining with `by = join_by(subject_id)`
clinical_data$Timepoint <- as.factor(clinical_data$planned_day_relative_to_boost)
## Read live_cell_percentages datasets
cell_freq <- experiments_binder(base_path, clinical_data, folders, "pbmc_cell_frequency")
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
print(dim(cell_freq))
## [1] 13895 17
## Read rnaseq datasets
rnaseq <- experiments_binder(base_path, clinical_data, folders, "pbmc_gene_expression")
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
print(dim(rnaseq))
## [1] 29559114 18
## Read olink datasets
olink <- experiments_binder(base_path, clinical_data, folders, "plasma_protein_expression")
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
# print(dim(olink))
# print(colnames(olink))
# print(as.factor(olink$dataset))
## Read Ab titer datasets
abtiter <- experiments_binder(base_path, clinical_data, folders, "plasma_ab_titer")
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
## Joining with `by = join_by(specimen_id)`
print(dim(abtiter))
## [1] 41705 22
colnames(abtiter)
## [1] "specimen_id" "isotype"
## [3] "is_antigen_specific" "antigen"
## [5] "MFI" "MFI_normalised"
## [7] "unit" "lower_limit_of_detection"
## [9] "subject_id" "infancy_vac"
## [11] "biological_sex" "ethnicity"
## [13] "race" "year_of_birth"
## [15] "date_of_boost" "dataset"
## [17] "actual_day_relative_to_boost" "planned_day_relative_to_boost"
## [19] "specimen_type" "visit"
## [21] "Timepoint" "year"
abtiter$antigen = toupper(abtiter$antigen)
# Exploratory plot 1
# e_plot1 <- demographic_plots(clinical_data, rnaseq, "RNASeq data")
# plotDF <- clinical_data %>%
# filter(specimen_id %in% cell_freq$specimen_id) %>%
# add_column(flag = 1) %>%
# dplyr::select(subject_id, Timepoint, flag) %>%
# distinct() %>%
# pivot_wider(names_from = Timepoint, values_from = flag) %>%
# dplyr::select(-subject_id) %>%
# group_by_all() %>%
# summarize(n = n()) %>%
# arrange(desc(n)) %>%
# rowid_to_column() %>%
# pivot_longer(cols = -c(rowid, n), names_to = "Timepoint") %>%
# filter(!is.na(value))
# print(tail(data.frame(plotDF), 100))
# write.csv(clinical_data, paste0(base_path, '/clinical_data.csv'), row.names =FALSE)
# write.csv(plotDF, paste0(base_path, '/freq_cell3.csv'), row.names =FALSE)
# print(dim(data.frame(plotDF)))
# pdf(paste0(base_path, "/RNASeq.pdf"), paper='USr', width=10, height=6)
# width = 30, height = 20 , units = 'in', res = 300 )
demographic_plots(clinical_data, rnaseq, "RNASeq data")
## [1] "RNASeq data"
## [1] "Number of subjects: 93"
## [1] "Number of specimen: 507"
## # A tibble: 93 × 6
## # Groups: dataset [3]
## subject_id biological_sex race ethnicity infan…¹ dataset
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 9 Male Asian Not Hispan… aP 2020_d…
## 2 13 Male White Not Hispan… aP 2020_d…
## 3 18 Female Unknown or Not Reported Hispanic o… aP 2020_d…
## 4 27 Female Asian Not Hispan… aP 2020_d…
## 5 29 Male White Hispanic o… aP 2020_d…
## 6 10 Female Asian Not Hispan… wP 2020_d…
## 7 15 Male Asian Not Hispan… wP 2020_d…
## 8 20 Female White Not Hispan… wP 2020_d…
## 9 21 Male White Not Hispan… wP 2020_d…
## 10 22 Female White Not Hispan… wP 2020_d…
## # … with 83 more rows, and abbreviated variable name ¹​infancy_vac
## `summarise()` has grouped output by '0', '1', '3', '7', '14', '-30'. You can
## override using the `.groups` argument.
## # A tibble: 12 × 4
## rowid n Timepoint value
## <int> <int> <chr> <dbl>
## 1 1 72 0 1
## 2 1 72 1 1
## 3 1 72 3 1
## 4 1 72 7 1
## 5 1 72 14 1
## 6 2 21 0 1
## 7 2 21 1 1
## 8 2 21 3 1
## 9 2 21 7 1
## 10 2 21 14 1
## 11 2 21 -30 1
## 12 2 21 -15 1


# dev.off()
# pdf(paste0(base_path, "/Olink.pdf"), paper='USr', width=10, height=6)
demographic_plots(clinical_data, olink, "OLINK data")
## [1] "OLINK data"
## [1] "Number of subjects: 74"
## [1] "Number of specimen: 326"
## # A tibble: 74 × 6
## # Groups: dataset [3]
## subject_id biological_sex race ethnicity infan…¹ dataset
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 21 Male White Not Hispan… wP 2020_d…
## 2 33 Male More Than One Race Hispanic o… wP 2020_d…
## 3 31 Female Asian Not Hispan… wP 2020_d…
## 4 26 Female Unknown or Not Reported Hispanic o… wP 2020_d…
## 5 47 Female White Not Hispan… aP 2020_d…
## 6 4 Male Asian Not Hispan… wP 2020_d…
## 7 11 Female Unknown or Not Reported Hispanic o… wP 2020_d…
## 8 29 Male White Hispanic o… aP 2020_d…
## 9 20 Female White Not Hispan… wP 2020_d…
## 10 6 Female White Not Hispan… wP 2020_d…
## # … with 64 more rows, and abbreviated variable name ¹​infancy_vac
## `summarise()` has grouped output by '0', '1', '3', '7', '14', '-30'. You can
## override using the `.groups` argument.
## # A tibble: 12 × 4
## rowid n Timepoint value
## <int> <int> <chr> <dbl>
## 1 1 54 0 1
## 2 1 54 1 1
## 3 1 54 3 1
## 4 1 54 7 1
## 5 1 54 14 1
## 6 2 16 0 1
## 7 2 16 -30 1
## 8 2 16 -15 1
## 9 3 3 0 1
## 10 3 3 -15 1
## 11 4 1 -30 1
## 12 4 1 -15 1


# dev.off()
# pdf(paste0(base_path, "/Cell_frequency.pdf"), paper='USr', width=10, height=6)
demographic_plots(clinical_data, cell_freq, "Cell frequencies data")
## [1] "Cell frequencies data"
## [1] "Number of subjects: 74"
## [1] "Number of specimen: 327"
## # A tibble: 74 × 6
## # Groups: dataset [3]
## subject_id biological_sex race ethnicity infan…¹ dataset
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 29 Male White Hispanic o… aP 2020_d…
## 2 36 Female White Hispanic o… aP 2020_d…
## 3 44 Female More Than One Race Hispanic o… aP 2020_d…
## 4 45 Female Asian Not Hispan… aP 2020_d…
## 5 46 Female Unknown or Not Reported Not Hispan… aP 2020_d…
## 6 47 Female White Not Hispan… aP 2020_d…
## 7 48 Female White Not Hispan… aP 2020_d…
## 8 49 Female White Not Hispan… aP 2020_d…
## 9 52 Male More Than One Race Not Hispan… aP 2020_d…
## 10 55 Female Asian Not Hispan… aP 2020_d…
## # … with 64 more rows, and abbreviated variable name ¹​infancy_vac
## `summarise()` has grouped output by '0', '1', '3', '7', '14', '-30'. You can
## override using the `.groups` argument.
## # A tibble: 10 × 4
## rowid n Timepoint value
## <int> <int> <chr> <dbl>
## 1 1 53 0 1
## 2 1 53 1 1
## 3 1 53 3 1
## 4 1 53 7 1
## 5 1 53 14 1
## 6 2 20 0 1
## 7 2 20 -30 1
## 8 2 20 -15 1
## 9 3 1 0 1
## 10 3 1 -15 1


# dev.off()
# pdf(paste0(base_path, "/Ab_titer.pdf"), paper='USr', width=10, height=6)
demographic_plots(clinical_data, abtiter, "Ab titer data")
## [1] "Ab titer data"
## [1] "Number of subjects: 111"
## [1] "Number of specimen: 685"
## # A tibble: 111 × 6
## # Groups: dataset [3]
## subject_id biological_sex race ethnicity infan…¹ dataset
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 1 Female White Not Hispan… wP 2020_d…
## 2 3 Female White Unknown wP 2020_d…
## 3 4 Male Asian Not Hispan… wP 2020_d…
## 4 5 Male Asian Not Hispan… wP 2020_d…
## 5 6 Female White Not Hispan… wP 2020_d…
## 6 7 Female More Than One Race Hispanic o… wP 2020_d…
## 7 9 Male Asian Not Hispan… aP 2020_d…
## 8 10 Female Asian Not Hispan… wP 2020_d…
## 9 11 Female Unknown or Not Reported Hispanic o… wP 2020_d…
## 10 12 Male Asian Not Hispan… wP 2020_d…
## # … with 101 more rows, and abbreviated variable name ¹​infancy_vac
## `summarise()` has grouped output by '0', '3', '7', '14', '30', '120', '464',
## '1', '423', '386', '511', '447', '448', '428', '415', '543', '402', '407',
## '396', '366', '-30'. You can override using the `.groups` argument.
## # A tibble: 122 × 4
## rowid n Timepoint value
## <int> <int> <chr> <dbl>
## 1 1 70 0 1
## 2 1 70 3 1
## 3 1 70 7 1
## 4 1 70 14 1
## 5 1 70 30 1
## 6 1 70 120 1
## 7 1 70 1 1
## 8 2 20 0 1
## 9 2 20 -30 1
## 10 2 20 -15 1
## # … with 112 more rows


# dev.off()
years = c('2020', '2021', '2022')
lt_all = upSetExpList(years, olink, rnaseq, cell_freq, abtiter)
# rnaseq
# lt_all
# print(lt_all)
# print(cbind(lt_all$olink2020, lt_all$olink2021))
# print(lt_all$olink2021)
knitr::opts_chunk$set(fig.width=6, fig.height=4.5)
mat_all <- make_comb_mat(lt_all)
# print(mat_all)
write.csv(mat_all, paste0(base_path, '/mat_all.csv'))
plot <- upset_plot1(mat_all)
# pdf(paste0(base_path, "/subjects.pdf"), paper='USr', width=10, height=6)
plot

# dev.off()
lt_protein <- upSetList(years, olink, 'protein_id')
# lt_protein
print(typeof(lt_protein$protein_id2021))
## [1] "character"
mat_protein = make_comb_mat(lt_protein)
plot <- upset_plot1(mat_protein)
# pdf(paste0(base_path, "/uniprot.pdf"), paper='USr', width=10, height=6)
plot

# dev.off()
lt_rnaseq <- upSetList(years, rnaseq, 'versioned_ensembl_gene_id')
mat_rnaseq = make_comb_mat(lt_rnaseq)
plot <- upset_plot1(mat_rnaseq)
# pdf(paste0(base_path, "/gene_id.pdf"), paper='USr', width=10, height=6)
plot

# dev.off()
print(colnames(cell_freq))
## [1] "specimen_id" "cell_type_name"
## [3] "percent_live_cell" "subject_id"
## [5] "infancy_vac" "biological_sex"
## [7] "ethnicity" "race"
## [9] "year_of_birth" "date_of_boost"
## [11] "dataset" "actual_day_relative_to_boost"
## [13] "planned_day_relative_to_boost" "specimen_type"
## [15] "visit" "Timepoint"
## [17] "year"
lt_cell_freq <- upSetList(years, cell_freq, 'cell_type_name')
mat_cell_freq = make_comb_mat(lt_cell_freq)
plot <- upset_plot1(mat_cell_freq)
# pdf(paste0(base_path, "/cell_type_name.pdf"), paper='USr', width=10, height=6)
plot

# dev.off()
print(colnames(abtiter))
## [1] "specimen_id" "isotype"
## [3] "is_antigen_specific" "antigen"
## [5] "MFI" "MFI_normalised"
## [7] "unit" "lower_limit_of_detection"
## [9] "subject_id" "infancy_vac"
## [11] "biological_sex" "ethnicity"
## [13] "race" "year_of_birth"
## [15] "date_of_boost" "dataset"
## [17] "actual_day_relative_to_boost" "planned_day_relative_to_boost"
## [19] "specimen_type" "visit"
## [21] "Timepoint" "year"
lt_abtiter <- upSetList(years, abtiter, 'antigen')
print(lt_abtiter)
## $antigen2020
## [1] "TOTAL" "PT" "PRN" "FHA" "ACT" "LOS" "FELD1"
## [8] "BETV1" "LOLP1" "MEASLES" "PTM" "FIM2/3" "TT" "DT"
## [15] "OVA" "PD1"
##
## $antigen2021
## [1] "PRN" "DT" "FHA" "FIM2/3" "TT" "PT" "OVA"
##
## $antigen2022
## [1] "DT" "FHA" "FIM2/3" "OVA" "PRN" "PT" "TT"
mat_abtiter = make_comb_mat(lt_abtiter)
plot <- upset_plot1(mat_abtiter)
# pdf(paste0(base_path, "/antigen.pdf"), paper='USr', width=10, height=6)
plot

# dev.off()
# ## Read subject data
# subject_2020 <- readr::read_tsv(paste0(base_path, "/2020_LD/2020LD_subject.tsv"), show_col_types = FALSE)
# # print(subject_2020[, 1:4])
# # subject_2020 <- read.delim(paste0(base_path, "/2020_LD/2020LD_subject.tsv"), sep="\t")
# subject_2021 <- readr::read_tsv(paste0(base_path, "/2021_LD/2021LD_subject.tsv"), show_col_types = FALSE)
# subject_2022 <- readr::read_tsv(paste0(base_path, "/2022_BD/2022BD_subject.tsv"), show_col_types = FALSE)
#
# subject <- rbind(subject_2020, subject_2021, subject_2022)
# print(dim(subject))
# # print(subject)
#
# ## Read specimen data
# specimen_2020 <- readr::read_tsv(paste0(base_path, "/2020_LD/2020LD_specimen.tsv"), show_col_types = FALSE)
# specimen_2021 <- readr::read_tsv(paste0(base_path, "/2021_LD/2021LD_specimen.tsv"), show_col_types = FALSE)
# specimen_2022 <- readr::read_tsv(paste0(base_path, "/2022_BD/2022BD_specimen.tsv"), show_col_types = FALSE)
#
# specimen = rbind(specimen_2020, specimen_2021, specimen_2022)
# print(specimen)
#
# ## Join subject and specimen data
# clinical_data <- dplyr::inner_join(subject, specimen, multiple = "all")
# print(clinical_data)
#
# # typeof(clinical_data$planned_day_relative_to_boost)
# clinical_data$Timepoint <- as.factor(clinical_data$planned_day_relative_to_boost)
# # typeof(clinical_data$Timepoint)
# # print(clinical_data$Timepoint)
#
# ## Read live_cell_percentages datasets
# cell_freq_2020 <- readr::read_tsv(paste0(base_path, "/2020_LD/2020LD_pbmc_cell_frequency.tsv"), show_col_types = FALSE) %>% dplyr::left_join(., clinical_data)
# cell_freq_2021 <- readr::read_tsv(paste0(base_path, "/2021_LD/2021LD_pbmc_cell_frequency.tsv"), show_col_types = FALSE) %>% dplyr::left_join(., clinical_data)
# cell_freq_2022 <- readr::read_tsv(paste0(base_path, "/2022_BD/2022BD_pbmc_cell_frequency.tsv"), show_col_types = FALSE) %>% dplyr::left_join(., clinical_data)
#
# cell_freq <- rbind(cell_freq_2020, cell_freq_2021, cell_freq_2022)
# print(dim(cell_freq))
#
# ## Read rnaseq datasets
## Read olink datasets
olink_2020 <- readr::read_tsv(paste0(base_path, "/2020_LD/2020LD_plasma_protein_expression.tsv"), show_col_types = FALSE) %>% dplyr::left_join(., clinical_data)
## Joining with `by = join_by(specimen_id)`
olink_2021 <- readr::read_tsv(paste0(base_path, "/2021_LD/2021LD_plasma_protein_expression.tsv"), show_col_types = FALSE) %>% dplyr::left_join(., clinical_data)
## Joining with `by = join_by(specimen_id)`
olink_2022 <- readr::read_tsv(paste0(base_path, "/2022_BD/2022BD_plasma_protein_expression.tsv"), show_col_types = FALSE) %>% dplyr::left_join(., clinical_data)
## Joining with `by = join_by(specimen_id)`
lt_protein <- list(
uniprot_2020 = unique(olink_2020$protein_id),
uniprot_2021 = unique(olink_2021$protein_id),
uniprot_2022 = unique(olink_2022$protein_id)
)
# print(typeof(lt_protein$uniprot_2021))
mat_protein = make_comb_mat(lt_protein)
plot <- upset_plot1(mat_protein)
plot
