Load Data
cellDf <- read_tsv(here("./data/2021LD_pbmc_cell_frequency.tsv"), show_col_types = FALSE)
cellDf <- cellDf[cellDf$specimen_id %in% samples,]
cellDf <- cellDf[cellDf$cell_type_name %in% cells, ]
cellDf <- as.data.frame(pivot_wider(cellDf, names_from = "cell_type_name",
values_from=c("percent_live_cell")))
row.names(cellDf) <- cellDf$specimen_id
subject_num <- length(levels(factor(metaDf[metaDf$specimen_id, "subject_id"])))
cellDf <- t(cellDf[,names(cellDf)!="specimen_id"])
# cellDf <- na.omit(cellDf)
cellDf <- cellDf[rowVars(cellDf, na.rm = TRUE)>0, ]
# print(names(which(colSums(is.na(cellDf)) > 0)))
incompSamples <- names(which(colSums(is.na(cellDf)) > 0))
compSamples <- names(which(colSums(is.na(cellDf)) == 0))
print(paste("Cell Frequency Incomplete Sample Cases:", length(incompSamples)))
## [1] "Cell Frequency Incomplete Sample Cases: 5"
print(paste("Cell Frequency Complete Sample Cases:", length(compSamples)))
## [1] "Cell Frequency Complete Sample Cases: 160"
print(paste("Cell Frequency Feature Number:", dim(cellDf)[1]))
## [1] "Cell Frequency Feature Number: 22"
incompFeature <- names(which(rowSums(is.na(cellDf)) > 0))
print(paste("Cell Frequency Incomplete Feature Numbers:", length(incompFeature)))
## [1] "Cell Frequency Incomplete Feature Numbers: 8"
print(c("Cell Frequency Incomplete Feature:", incompFeature))
## [1] "Cell Frequency Incomplete Feature:" "NaiveCD4"
## [3] "TcmCD4" "TemraCD4"
## [5] "TemCD4" "NaiveCD8"
## [7] "TcmCD8" "TemCD8"
## [9] "TemraCD8"
for (f in incompFeature){
subDf <- data.frame(cellDf[f,])
incompSamp <- names(which(rowSums(is.na(subDf)) > 0))
incompSubj <- levels(factor(metaDf[metaDf$specimen_id %in% incompSamp, "subject_id"]))
print(paste(f, "Number of Incomplete Samples:"))
print(length(incompSamp))
print(paste(f, "Incomplete Samples:"))
print(incompSamp)
print(paste(f, "Number of Incomplete Subjects:"))
print(length(incompSubj))
print(paste(f, "Incomplete Subjects:"))
print(incompSubj)
}
## [1] "NaiveCD4 Number of Incomplete Samples:"
## [1] 5
## [1] "NaiveCD4 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "NaiveCD4 Number of Incomplete Subjects:"
## [1] 1
## [1] "NaiveCD4 Incomplete Subjects:"
## [1] "70"
## [1] "TcmCD4 Number of Incomplete Samples:"
## [1] 5
## [1] "TcmCD4 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "TcmCD4 Number of Incomplete Subjects:"
## [1] 1
## [1] "TcmCD4 Incomplete Subjects:"
## [1] "70"
## [1] "TemraCD4 Number of Incomplete Samples:"
## [1] 5
## [1] "TemraCD4 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "TemraCD4 Number of Incomplete Subjects:"
## [1] 1
## [1] "TemraCD4 Incomplete Subjects:"
## [1] "70"
## [1] "TemCD4 Number of Incomplete Samples:"
## [1] 5
## [1] "TemCD4 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "TemCD4 Number of Incomplete Subjects:"
## [1] 1
## [1] "TemCD4 Incomplete Subjects:"
## [1] "70"
## [1] "NaiveCD8 Number of Incomplete Samples:"
## [1] 5
## [1] "NaiveCD8 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "NaiveCD8 Number of Incomplete Subjects:"
## [1] 1
## [1] "NaiveCD8 Incomplete Subjects:"
## [1] "70"
## [1] "TcmCD8 Number of Incomplete Samples:"
## [1] 5
## [1] "TcmCD8 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "TcmCD8 Number of Incomplete Subjects:"
## [1] 1
## [1] "TcmCD8 Incomplete Subjects:"
## [1] "70"
## [1] "TemCD8 Number of Incomplete Samples:"
## [1] 5
## [1] "TemCD8 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "TemCD8 Number of Incomplete Subjects:"
## [1] 1
## [1] "TemCD8 Incomplete Subjects:"
## [1] "70"
## [1] "TemraCD8 Number of Incomplete Samples:"
## [1] 5
## [1] "TemraCD8 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "TemraCD8 Number of Incomplete Subjects:"
## [1] 1
## [1] "TemraCD8 Incomplete Subjects:"
## [1] "70"
print("Cell Frequency Incomplete Samples:")
## [1] "Cell Frequency Incomplete Samples:"
print(incompSamples)
## [1] "537" "538" "539" "540" "541"
print("Cell Frequency Incomplete Subjects:")
## [1] "Cell Frequency Incomplete Subjects:"
print(levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "subject_id"])))
## [1] "70"
print(levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "timepoint"])))
## [1] "0" "1" "3" "7" "14"
incompSubj <- levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "subject_id"]))
for (s in incompSubj){
print(paste("Cell Frequency", s, "Timepoints of Incomplete Subjects:"))
print(levels(factor(metaDf[which(metaDf$specimen_id %in% incompSamples & metaDf$subject_id==s), "timepoint"])))
}
## [1] "Cell Frequency 70 Timepoints of Incomplete Subjects:"
## [1] "0" "1" "3" "7" "14"
print(paste("Cell Frequency Number of Incomplete Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "subject_id"])))))
## [1] "Cell Frequency Number of Incomplete Subjects: 1"
print(paste("Cell Frequency Number of Complete Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% compSamples, "subject_id"])))))
## [1] "Cell Frequency Number of Complete Subjects: 32"
print(paste("Cell Frequency Number of All Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% colnames(cellDf), "subject_id"])))))
## [1] "Cell Frequency Number of All Subjects: 33"
# print(paste("Cell Frequency Incomplete Subject Cases:", length(names(which(colSums(is.na(cellDf)) > 0)))))
# print(paste("Cell Frequency Complete Subject Cases:", length(names(which(colSums(is.na(cellDf)) == 0)))))
# print(paste("Cell Frequency Feature Subject Number:", dim(cellDf)[1])
abtiterDf <- read_tsv(here("./data/2021LD_plasma_ab_titer.tsv"), show_col_types = FALSE)
abtiterDf <- abtiterDf[abtiterDf$specimen_id %in% samples, ]
abtiterDf["antigen"] <- paste(abtiterDf$isotype, abtiterDf$antigen, sep = "_")
abtiterDf <- abtiterDf[abtiterDf$antigen %in% antigens, c("specimen_id", "antigen", "MFI_normalised")]
abtiterDf <- as.data.frame(pivot_wider(abtiterDf, names_from = "antigen",
values_from=c("MFI_normalised")))
row.names(abtiterDf) <- abtiterDf$specimen_id
abtiterDf <- t(abtiterDf[, names(abtiterDf)!="specimen_id"])
abtiterDf <- abtiterDf[rowVars(abtiterDf, na.rm = TRUE)>0, ]
incompSamples <- names(which(colSums(is.na(abtiterDf)) > 0))
compSamples <- names(which(colSums(is.na(abtiterDf)) == 0))
print(paste("Ab Titer Incomplete Sample Cases:", length(incompSamples)))
## [1] "Ab Titer Incomplete Sample Cases: 0"
print(paste("Ab Titer Complete Sample Cases:", length(compSamples)))
## [1] "Ab Titer Complete Sample Cases: 165"
print(paste("Ab Titer Feature Number:", dim(abtiterDf)[1]))
## [1] "Ab Titer Feature Number: 31"
incompFeature <- names(which(rowSums(is.na(abtiterDf)) > 0))
print(paste("Ab Titer Incomplete Feature Numbers:", length(incompFeature)))
## [1] "Ab Titer Incomplete Feature Numbers: 0"
print(c("Ab Titer Incomplete Feature:", incompFeature))
## [1] "Ab Titer Incomplete Feature:"
for (f in incompFeature){
subDf <- data.frame(abtiterDf[f,])
incompSamp <- names(which(rowSums(is.na(subDf)) > 0))
incompSubj <- levels(factor(metaDf[metaDf$specimen_id %in% incompSamp, "subject_id"]))
print(paste(f, "Number of Incomplete Samples:"))
print(length(incompSamp))
print(paste(f, "Incomplete Samples:"))
print(incompSamp)
print(paste(f, "Number of Incomplete Subjects:"))
print(length(incompSubj))
print(paste(f, "Incomplete Subjects:"))
print(incompSubj)
}
print("Ab Titer Incomplete Samples:")
## [1] "Ab Titer Incomplete Samples:"
print(incompSamples)
## character(0)
print("Ab Titer Incomplete Subjects:")
## [1] "Ab Titer Incomplete Subjects:"
print(levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "subject_id"])))
## character(0)
print(levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "timepoint"])))
## character(0)
incompSubj <- levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "subject_id"]))
for (s in incompSubj){
print(paste("Ab Titer", s, "Timepoints of Incomplete Subjects:"))
print(levels(factor(metaDf[which(metaDf$specimen_id %in% incompSamples & metaDf$subject_id==s), "timepoint"])))
}
print(paste("Ab Titer Number of Incomplete Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "subject_id"])))))
## [1] "Ab Titer Number of Incomplete Subjects: 0"
print(paste("Ab Titer Number of Complete Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% compSamples, "subject_id"])))))
## [1] "Ab Titer Number of Complete Subjects: 33"
print(paste("Ab Titer Number of All Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% colnames(abtiterDf), "subject_id"])))))
## [1] "Ab Titer Number of All Subjects: 33"
rnaDf <- read_tsv(here("./data/2021LD_pbmc_gene_expression.tsv"), show_col_types = FALSE)
rnaDf <- rnaDf[rnaDf$specimen_id %in% samples, ]
rnaDf$versioned_ensembl_gene_id <- gsub("\\..*", "", rnaDf$versioned_ensembl_gene_id)
# print(length(levels(factor(rnaDf$versioned_ensembl_gene_id))))
rnaDf <- rnaDf[rnaDf$versioned_ensembl_gene_id %in% genes, c("specimen_id", "versioned_ensembl_gene_id", "tpm")]
rnaDf <- as.data.frame(pivot_wider(rnaDf, names_from = "versioned_ensembl_gene_id",
values_from=c("tpm")))
row.names(rnaDf) <- rnaDf$specimen_id
rnaDf <- t(rnaDf[, names(rnaDf)!="specimen_id"])
# print(dim(rnaDf))
rnaDf <- rnaDf[(rowSums(rnaDf>1)/dim(rnaDf)[2])*100 >70,]
rnaDf <- rnaDf[rowVars(rnaDf, na.rm = TRUE)>0, ]
incompSamples <- names(which(colSums(is.na(rnaDf)) > 0))
compSamples <- names(which(colSums(is.na(rnaDf)) == 0))
print(paste("RNA seq Incomplete Sample Cases:", length(incompSamples)))
## [1] "RNA seq Incomplete Sample Cases: 0"
print(paste("RNA seq Complete Sample Cases:", length(compSamples)))
## [1] "RNA seq Complete Sample Cases: 180"
print(paste("RNA seq Feature Number:", dim(rnaDf)[1]))
## [1] "RNA seq Feature Number: 14053"
# print(dim(rnaDf))
incompFeature <- names(which(rowSums(is.na(rnaDf)) > 0))
print(paste("RNA seq Incomplete Feature Numbers:", length(incompFeature)))
## [1] "RNA seq Incomplete Feature Numbers: 0"
print(c("RNA seq Incomplete Feature:", incompFeature))
## [1] "RNA seq Incomplete Feature:"
for (f in incompFeature){
subDf <- data.frame(rnaDf[f,])
incompSamp <- names(which(rowSums(is.na(subDf)) > 0))
incompSubj <- levels(factor(metaDf[metaDf$specimen_id %in% incompSamp, "subject_id"]))
print(paste(f, "Number of Incomplete Samples:"))
print(length(incompSamp))
print(paste(f, "Incomplete Samples:"))
print(incompSamp)
print(paste(f, "Number of Incomplete Subjects:"))
print(length(incompSubj))
print(paste(f, "Incomplete Subjects:"))
print(incompSubj)
}
print("RNA seq Incomplete Samples:")
## [1] "RNA seq Incomplete Samples:"
print(incompSamples)
## character(0)
print("RNA seq Incomplete Subjects:")
## [1] "RNA seq Incomplete Subjects:"
print(levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "subject_id"])))
## character(0)
print(levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "timepoint"])))
## character(0)
incompSubj <- levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "subject_id"]))
for (s in incompSubj){
print(paste("RNA seq", s, "Timepoints of Incomplete Subjects:"))
# samp <-
print(levels(factor(metaDf[which(metaDf$specimen_id %in% incompSamples & metaDf$subject_id==s), "timepoint"])))
}
print(paste("RNA seq Number of Incomplete Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "subject_id"])))))
## [1] "RNA seq Number of Incomplete Subjects: 0"
print(paste("RNA seq Number of Complete Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% compSamples, "subject_id"])))))
## [1] "RNA seq Number of Complete Subjects: 36"
print(paste("RNA seq Number of All Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% colnames(rnaDf), "subject_id"])))))
## [1] "RNA seq Number of All Subjects: 36"
olinkDf <- read_tsv(here("./data/2021LD_plasma_cytokine_concentration.tsv"), show_col_types = FALSE)
olinkDf <- olinkDf[olinkDf$specimen_id %in% samples, ]
olinkDf <- olinkDf[olinkDf$protein_id %in% proteins, c("specimen_id", "protein_id", "protein_expression")]
olinkDf <- as.data.frame(pivot_wider(olinkDf, names_from = "protein_id",
values_from=c("protein_expression")))
row.names(olinkDf) <- olinkDf$specimen_id
olinkDf <- t(olinkDf[, names(olinkDf)!="specimen_id"])
olinkDf <- olinkDf[rowVars(olinkDf, na.rm = TRUE)>0, ]
incompSamples <- names(which(colSums(is.na(olinkDf)) > 0))
compSamples <- names(which(colSums(is.na(olinkDf)) == 0))
print(paste("Olink Incomplete Sample Cases:", length(incompSamples)))
## [1] "Olink Incomplete Sample Cases: 23"
print(paste("Olink Complete Sample Cases:", length(compSamples)))
## [1] "Olink Complete Sample Cases: 157"
print(paste("Olink Feature Number:", dim(olinkDf)[1]))
## [1] "Olink Feature Number: 30"
incompFeature <- names(which(rowSums(is.na(olinkDf)) > 0))
print(paste("Olink Incomplete Feature Numbers:", length(incompFeature)))
## [1] "Olink Incomplete Feature Numbers: 3"
print(c("Olink Incomplete Feature:", incompFeature))
## [1] "Olink Incomplete Feature:" "O95760"
## [3] "P60568" "P35225"
for (f in incompFeature){
subDf <- data.frame(olinkDf[f,])
incompSamp <- names(which(rowSums(is.na(subDf)) > 0))
incompSubj <- levels(factor(metaDf[metaDf$specimen_id %in% incompSamp, "subject_id"]))
print(paste(f, "Number of Incomplete Samples:"))
print(length(incompSamp))
print(paste(f, "Incomplete Samples:"))
print(incompSamp)
print(paste(f, "Number of Incomplete Subjects:"))
print(length(incompSubj))
print(paste(f, "Incomplete Subjects:"))
print(incompSubj)
}
## [1] "O95760 Number of Incomplete Samples:"
## [1] 2
## [1] "O95760 Incomplete Samples:"
## [1] "696" "619"
## [1] "O95760 Number of Incomplete Subjects:"
## [1] 2
## [1] "O95760 Incomplete Subjects:"
## [1] "80" "92"
## [1] "P60568 Number of Incomplete Samples:"
## [1] 16
## [1] "P60568 Incomplete Samples:"
## [1] "472" "643" "483" "486" "586" "589" "569" "664" "517" "668" "513" "516"
## [13] "611" "698" "699" "688"
## [1] "P60568 Number of Incomplete Subjects:"
## [1] 10
## [1] "P60568 Incomplete Subjects:"
## [1] "61" "63" "67" "74" "76" "79" "84" "87" "91" "92"
## [1] "P35225 Number of Incomplete Samples:"
## [1] 6
## [1] "P35225 Incomplete Samples:"
## [1] "469" "547" "548" "569" "563" "709"
## [1] "P35225 Number of Incomplete Subjects:"
## [1] 5
## [1] "P35225 Incomplete Subjects:"
## [1] "61" "71" "73" "74" "94"
print("Olink Incomplete Samples:")
## [1] "Olink Incomplete Samples:"
print(incompSamples)
## [1] "472" "469" "547" "548" "643" "483" "486" "586" "589" "569" "664" "563"
## [13] "517" "668" "513" "516" "696" "611" "698" "699" "688" "619" "709"
print("Olink Incomplete Subjects:")
## [1] "Olink Incomplete Subjects:"
print(levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "subject_id"])))
## [1] "61" "63" "67" "71" "73" "74" "76" "79" "80" "84" "87" "91" "92" "94"
print(levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "timepoint"])))
## [1] "0" "1" "3" "7" "14"
incompSubj <- levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "subject_id"]))
for (s in incompSubj){
print(paste("Olink", s, "Timepoints of Incomplete Subjects:"))
# samp <-
print(levels(factor(metaDf[which(metaDf$specimen_id %in% incompSamples & metaDf$subject_id==s), "timepoint"])))
}
## [1] "Olink 61 Timepoints of Incomplete Subjects:"
## [1] "1" "14"
## [1] "Olink 63 Timepoints of Incomplete Subjects:"
## [1] "0" "7"
## [1] "Olink 67 Timepoints of Incomplete Subjects:"
## [1] "0" "7" "14"
## [1] "Olink 71 Timepoints of Incomplete Subjects:"
## [1] "1" "3"
## [1] "Olink 73 Timepoints of Incomplete Subjects:"
## [1] "1"
## [1] "Olink 74 Timepoints of Incomplete Subjects:"
## [1] "0"
## [1] "Olink 76 Timepoints of Incomplete Subjects:"
## [1] "1" "14"
## [1] "Olink 79 Timepoints of Incomplete Subjects:"
## [1] "7"
## [1] "Olink 80 Timepoints of Incomplete Subjects:"
## [1] "7"
## [1] "Olink 84 Timepoints of Incomplete Subjects:"
## [1] "0"
## [1] "Olink 87 Timepoints of Incomplete Subjects:"
## [1] "0" "14"
## [1] "Olink 91 Timepoints of Incomplete Subjects:"
## [1] "0"
## [1] "Olink 92 Timepoints of Incomplete Subjects:"
## [1] "1" "7" "14"
## [1] "Olink 94 Timepoints of Incomplete Subjects:"
## [1] "0"
print(paste("Olink Number of Incomplete Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "subject_id"])))))
## [1] "Olink Number of Incomplete Subjects: 14"
print(paste("Olink Number of Complete Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% compSamples, "subject_id"])))))
## [1] "Olink Number of Complete Subjects: 36"
print(paste("Olink Number of All Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% colnames(olinkDf), "subject_id"])))))
## [1] "Olink Number of All Subjects: 36"
dataList <- list()
dataList[["original"]] <- list("abtiter"= abtiterDf,
"cytof"= cellDf,
"olink"= olinkDf,
"rnaseq"=rnaDf)
K = 20
# int_cols <- Reduce(intersect, lapply(dataList$original[c("abtiter", "cytof", "olink", "rnaseq")], colnames))
# cols <- unique(c(int_cols, colnames(dataList$original[["rnaseq"]])))
cols <- colnames(dataList$original[["rnaseq"]])
add_cols <- function(df, cols, exp) {
print(paste("************************", exp, "********************************"))
df <- df[, colnames(df) %in% cols]
add <- setdiff(cols, colnames(df))
print(paste(exp, "All the Subjects:"))
print(levels(factor(metaDf[metaDf$specimen_id %in% colnames(df), "subject_id"])))
print(paste(exp, "Number of All the Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% colnames(df), "subject_id"])))))
# print(df[, !colnames(df) %in% add])
incompSamples <- setdiff(names(which(colSums(is.na(df)) > 0)), add)
compSamples <- setdiff(names(which(colSums(is.na(df)) == 0)), add)
# print(colnames(df))
# print(add)
# print(incompSamples)
print(paste(exp, "Number of selected Samples:", dim(df)[2]))
print(paste(exp, "Number of Features:", dim(df)[1]))
incompFeature <- setdiff(names(which(rowSums(is.na(df)) > 0)), add)
print(paste(exp, "Number of Incomplete Features:", length(incompFeature)))
print(paste(exp, "Incomplete Features:", incompFeature))
for (f in incompFeature){
subDf <- data.frame(df[f,])
incompSamp <- setdiff(names(which(rowSums(is.na(subDf)) > 0)), add)
incompSubj <- levels(factor(metaDf[metaDf$specimen_id %in% incompSamp, "subject_id"]))
print(paste(f, "Number of Incomplete Samples:"))
print(length(incompSamp))
print(paste(f, "Incomplete Samples:"))
print(incompSamp)
print(paste(f, "Number of Incomplete Subjects:"))
print(length(incompSubj))
print(paste(f, "Incomplete Subjects:"))
print(incompSubj)
}
print(paste(exp, "Incomplete Samples:") )
print(incompSamples)
print(paste(exp, "Incomplete Subjects:"))
print(levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "subject_id"])))
print(levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "timepoint"])))
incompSubj <- levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "subject_id"]))
for (s in incompSubj){
print(paste(exp, s, "Timepoints of Incomplete Subjects:"))
print(levels(factor(metaDf[which(metaDf$specimen_id %in% incompSamples & metaDf$subject_id==s), "timepoint"])))
}
print(paste(exp, "Number of Missing Subjects:",
length(levels(factor(metaDf[metaDf$specimen_id %in% add, "subject_id"])))))
print(paste(exp, "Missing Subjects:"))
print(levels(factor(metaDf[metaDf$specimen_id %in% add, "subject_id"])))
print(paste(exp, "Number of Incomplete Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% incompSamples, "subject_id"])))))
print(paste(exp, "Number of Complete Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% compSamples, "subject_id"])))))
print(paste(exp, "Number of All Subjects:", length(levels(factor(metaDf[metaDf$specimen_id %in% colnames(df), "subject_id"])))))
print(paste(exp, "Number of Missing Samples:", length(add)))
if(length(incompFeature)>0){set.seed(1)
print(paste(exp, "Impute Missing Features for:", incompFeature))
print("====================================================================")
df <- t(impute.knn(t(df), k=K)$data)}
dumyDf <- data.frame(matrix(ncol = length(add), nrow = nrow(df)), row.names = row.names(df))
colnames(dumyDf) <- add
if(length(add) != 0) df <- cbind(df, dumyDf)
print(paste(exp, "Number of all Samples:", dim(df)[2]))
print("********************************************************************")
return(as.matrix(df[, sort(cols)]))
}
# dataList$addedMissingVals[["rnaseq"]] <- add_cols(rnaDf[, int_cols], cols, "RNA seq")
dataList$addedMissingVals[["rnaseq"]] <- add_cols(rnaDf, cols, "RNA seq")
## [1] "************************ RNA seq ********************************"
## [1] "RNA seq All the Subjects:"
## [1] "61" "62" "63" "64" "65" "66" "67" "68" "69" "70" "71" "72" "73" "74" "75"
## [16] "76" "77" "78" "79" "80" "81" "82" "83" "84" "85" "86" "87" "88" "89" "90"
## [31] "91" "92" "93" "94" "95" "96"
## [1] "RNA seq Number of All the Subjects: 36"
## [1] "RNA seq Number of selected Samples: 180"
## [1] "RNA seq Number of Features: 14053"
## [1] "RNA seq Number of Incomplete Features: 0"
## [1] "RNA seq Incomplete Features: "
## [1] "RNA seq Incomplete Samples:"
## character(0)
## [1] "RNA seq Incomplete Subjects:"
## character(0)
## character(0)
## [1] "RNA seq Number of Missing Subjects: 0"
## [1] "RNA seq Missing Subjects:"
## character(0)
## [1] "RNA seq Number of Incomplete Subjects: 0"
## [1] "RNA seq Number of Complete Subjects: 36"
## [1] "RNA seq Number of All Subjects: 36"
## [1] "RNA seq Number of Missing Samples: 0"
## [1] "RNA seq Number of all Samples: 180"
## [1] "********************************************************************"
dataList$addedMissingVals[["abtiter"]] <- add_cols(abtiterDf, cols, "Ab-titer")
## [1] "************************ Ab-titer ********************************"
## [1] "Ab-titer All the Subjects:"
## [1] "61" "62" "63" "64" "65" "66" "67" "68" "69" "70" "71" "72" "73" "74" "75"
## [16] "76" "77" "78" "79" "80" "81" "83" "84" "85" "86" "89" "90" "91" "92" "93"
## [31] "94" "95" "96"
## [1] "Ab-titer Number of All the Subjects: 33"
## [1] "Ab-titer Number of selected Samples: 165"
## [1] "Ab-titer Number of Features: 31"
## [1] "Ab-titer Number of Incomplete Features: 0"
## [1] "Ab-titer Incomplete Features: "
## [1] "Ab-titer Incomplete Samples:"
## character(0)
## [1] "Ab-titer Incomplete Subjects:"
## character(0)
## character(0)
## [1] "Ab-titer Number of Missing Subjects: 3"
## [1] "Ab-titer Missing Subjects:"
## [1] "82" "87" "88"
## [1] "Ab-titer Number of Incomplete Subjects: 0"
## [1] "Ab-titer Number of Complete Subjects: 33"
## [1] "Ab-titer Number of All Subjects: 33"
## [1] "Ab-titer Number of Missing Samples: 15"
## [1] "Ab-titer Number of all Samples: 180"
## [1] "********************************************************************"
dataList$addedMissingVals[["cytof"]] <- add_cols(cellDf, cols, "Cell Freq")
## [1] "************************ Cell Freq ********************************"
## [1] "Cell Freq All the Subjects:"
## [1] "63" "64" "65" "66" "67" "68" "69" "70" "71" "72" "73" "74" "76" "77" "78"
## [16] "79" "80" "81" "82" "83" "84" "85" "86" "87" "88" "89" "90" "91" "92" "93"
## [31] "94" "95" "96"
## [1] "Cell Freq Number of All the Subjects: 33"
## [1] "Cell Freq Number of selected Samples: 165"
## [1] "Cell Freq Number of Features: 22"
## [1] "Cell Freq Number of Incomplete Features: 8"
## [1] "Cell Freq Incomplete Features: NaiveCD4"
## [2] "Cell Freq Incomplete Features: TcmCD4"
## [3] "Cell Freq Incomplete Features: TemraCD4"
## [4] "Cell Freq Incomplete Features: TemCD4"
## [5] "Cell Freq Incomplete Features: NaiveCD8"
## [6] "Cell Freq Incomplete Features: TcmCD8"
## [7] "Cell Freq Incomplete Features: TemCD8"
## [8] "Cell Freq Incomplete Features: TemraCD8"
## [1] "NaiveCD4 Number of Incomplete Samples:"
## [1] 5
## [1] "NaiveCD4 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "NaiveCD4 Number of Incomplete Subjects:"
## [1] 1
## [1] "NaiveCD4 Incomplete Subjects:"
## [1] "70"
## [1] "TcmCD4 Number of Incomplete Samples:"
## [1] 5
## [1] "TcmCD4 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "TcmCD4 Number of Incomplete Subjects:"
## [1] 1
## [1] "TcmCD4 Incomplete Subjects:"
## [1] "70"
## [1] "TemraCD4 Number of Incomplete Samples:"
## [1] 5
## [1] "TemraCD4 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "TemraCD4 Number of Incomplete Subjects:"
## [1] 1
## [1] "TemraCD4 Incomplete Subjects:"
## [1] "70"
## [1] "TemCD4 Number of Incomplete Samples:"
## [1] 5
## [1] "TemCD4 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "TemCD4 Number of Incomplete Subjects:"
## [1] 1
## [1] "TemCD4 Incomplete Subjects:"
## [1] "70"
## [1] "NaiveCD8 Number of Incomplete Samples:"
## [1] 5
## [1] "NaiveCD8 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "NaiveCD8 Number of Incomplete Subjects:"
## [1] 1
## [1] "NaiveCD8 Incomplete Subjects:"
## [1] "70"
## [1] "TcmCD8 Number of Incomplete Samples:"
## [1] 5
## [1] "TcmCD8 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "TcmCD8 Number of Incomplete Subjects:"
## [1] 1
## [1] "TcmCD8 Incomplete Subjects:"
## [1] "70"
## [1] "TemCD8 Number of Incomplete Samples:"
## [1] 5
## [1] "TemCD8 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "TemCD8 Number of Incomplete Subjects:"
## [1] 1
## [1] "TemCD8 Incomplete Subjects:"
## [1] "70"
## [1] "TemraCD8 Number of Incomplete Samples:"
## [1] 5
## [1] "TemraCD8 Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "TemraCD8 Number of Incomplete Subjects:"
## [1] 1
## [1] "TemraCD8 Incomplete Subjects:"
## [1] "70"
## [1] "Cell Freq Incomplete Samples:"
## [1] "537" "538" "539" "540" "541"
## [1] "Cell Freq Incomplete Subjects:"
## [1] "70"
## [1] "0" "1" "3" "7" "14"
## [1] "Cell Freq 70 Timepoints of Incomplete Subjects:"
## [1] "0" "1" "3" "7" "14"
## [1] "Cell Freq Number of Missing Subjects: 3"
## [1] "Cell Freq Missing Subjects:"
## [1] "61" "62" "75"
## [1] "Cell Freq Number of Incomplete Subjects: 1"
## [1] "Cell Freq Number of Complete Subjects: 32"
## [1] "Cell Freq Number of All Subjects: 33"
## [1] "Cell Freq Number of Missing Samples: 15"
## [1] "Cell Freq Impute Missing Features for: NaiveCD4"
## [2] "Cell Freq Impute Missing Features for: TcmCD4"
## [3] "Cell Freq Impute Missing Features for: TemraCD4"
## [4] "Cell Freq Impute Missing Features for: TemCD4"
## [5] "Cell Freq Impute Missing Features for: NaiveCD8"
## [6] "Cell Freq Impute Missing Features for: TcmCD8"
## [7] "Cell Freq Impute Missing Features for: TemCD8"
## [8] "Cell Freq Impute Missing Features for: TemraCD8"
## [1] "===================================================================="
## [1] "Cell Freq Number of all Samples: 180"
## [1] "********************************************************************"
dataList$addedMissingVals[["olink"]] <- add_cols(olinkDf, cols, "Olink")
## [1] "************************ Olink ********************************"
## [1] "Olink All the Subjects:"
## [1] "61" "62" "63" "64" "65" "66" "67" "68" "69" "70" "71" "72" "73" "74" "75"
## [16] "76" "77" "78" "79" "80" "81" "82" "83" "84" "85" "86" "87" "88" "89" "90"
## [31] "91" "92" "93" "94" "95" "96"
## [1] "Olink Number of All the Subjects: 36"
## [1] "Olink Number of selected Samples: 180"
## [1] "Olink Number of Features: 30"
## [1] "Olink Number of Incomplete Features: 3"
## [1] "Olink Incomplete Features: O95760" "Olink Incomplete Features: P60568"
## [3] "Olink Incomplete Features: P35225"
## [1] "O95760 Number of Incomplete Samples:"
## [1] 2
## [1] "O95760 Incomplete Samples:"
## [1] "696" "619"
## [1] "O95760 Number of Incomplete Subjects:"
## [1] 2
## [1] "O95760 Incomplete Subjects:"
## [1] "80" "92"
## [1] "P60568 Number of Incomplete Samples:"
## [1] 16
## [1] "P60568 Incomplete Samples:"
## [1] "472" "643" "483" "486" "586" "589" "569" "664" "517" "668" "513" "516"
## [13] "611" "698" "699" "688"
## [1] "P60568 Number of Incomplete Subjects:"
## [1] 10
## [1] "P60568 Incomplete Subjects:"
## [1] "61" "63" "67" "74" "76" "79" "84" "87" "91" "92"
## [1] "P35225 Number of Incomplete Samples:"
## [1] 6
## [1] "P35225 Incomplete Samples:"
## [1] "469" "547" "548" "569" "563" "709"
## [1] "P35225 Number of Incomplete Subjects:"
## [1] 5
## [1] "P35225 Incomplete Subjects:"
## [1] "61" "71" "73" "74" "94"
## [1] "Olink Incomplete Samples:"
## [1] "472" "469" "547" "548" "643" "483" "486" "586" "589" "569" "664" "563"
## [13] "517" "668" "513" "516" "696" "611" "698" "699" "688" "619" "709"
## [1] "Olink Incomplete Subjects:"
## [1] "61" "63" "67" "71" "73" "74" "76" "79" "80" "84" "87" "91" "92" "94"
## [1] "0" "1" "3" "7" "14"
## [1] "Olink 61 Timepoints of Incomplete Subjects:"
## [1] "1" "14"
## [1] "Olink 63 Timepoints of Incomplete Subjects:"
## [1] "0" "7"
## [1] "Olink 67 Timepoints of Incomplete Subjects:"
## [1] "0" "7" "14"
## [1] "Olink 71 Timepoints of Incomplete Subjects:"
## [1] "1" "3"
## [1] "Olink 73 Timepoints of Incomplete Subjects:"
## [1] "1"
## [1] "Olink 74 Timepoints of Incomplete Subjects:"
## [1] "0"
## [1] "Olink 76 Timepoints of Incomplete Subjects:"
## [1] "1" "14"
## [1] "Olink 79 Timepoints of Incomplete Subjects:"
## [1] "7"
## [1] "Olink 80 Timepoints of Incomplete Subjects:"
## [1] "7"
## [1] "Olink 84 Timepoints of Incomplete Subjects:"
## [1] "0"
## [1] "Olink 87 Timepoints of Incomplete Subjects:"
## [1] "0" "14"
## [1] "Olink 91 Timepoints of Incomplete Subjects:"
## [1] "0"
## [1] "Olink 92 Timepoints of Incomplete Subjects:"
## [1] "1" "7" "14"
## [1] "Olink 94 Timepoints of Incomplete Subjects:"
## [1] "0"
## [1] "Olink Number of Missing Subjects: 0"
## [1] "Olink Missing Subjects:"
## character(0)
## [1] "Olink Number of Incomplete Subjects: 14"
## [1] "Olink Number of Complete Subjects: 36"
## [1] "Olink Number of All Subjects: 36"
## [1] "Olink Number of Missing Samples: 0"
## [1] "Olink Impute Missing Features for: O95760"
## [2] "Olink Impute Missing Features for: P60568"
## [3] "Olink Impute Missing Features for: P35225"
## [1] "===================================================================="
## [1] "Olink Number of all Samples: 180"
## [1] "********************************************************************"