suppressPackageStartupMessages({
# BiocManager
library(GenomicSuperSignature)
library(curatedTCGAData)
library(MultiAssayExperiment)
library(TCGAutils)
library(ComplexHeatmap)
# CRAN
library(tidyverse) # includes dplyr, ggplot2, magrittr, tidyr
library(magick)
library(wordcloud)
library(ztable)
library(metafolio)
})
RAVmodel <- getModel('C2', load=TRUE)
## [1] "downloading"
coad <- curatedTCGAData(diseaseCode = 'COAD',
assays = 'RNA*',
version = '2.0.1',
dry.run = FALSE)
coad_rna <- getWithColData(coad,
'COAD_RNASeq2Gene-20160128')
assay(coad_rna) <- log2(assay(coad_rna) + 1)
#head(colData(coad_rna)[,1:4])
# Clean MSI Data
table(coad$patient.microsatellite_instability_test_results.microsatellite_instability_test_result.mononucleotide_and_dinucleotide_marker_panel_analysis_status)
##
## indeterminate msi-h msi-l mss
## 2 83 82 288
# Conditions and Replacement Values
conditions <- c("indeterminate", "msi-l")
replacements <- c(NA, "mss")
coad$patient.microsatellite_instability_test_results.microsatellite_instability_test_result.mononucleotide_and_dinucleotide_marker_panel_analysis_status <-
replace(coad$patient.microsatellite_instability_test_results.microsatellite_instability_test_result.mononucleotide_and_dinucleotide_marker_panel_analysis_status,
coad$patient.microsatellite_instability_test_results.microsatellite_instability_test_result.mononucleotide_and_dinucleotide_marker_panel_analysis_status %in% conditions, replacements)
table(coad$patient.microsatellite_instability_test_results.microsatellite_instability_test_result.mononucleotide_and_dinucleotide_marker_panel_analysis_status)
##
## msi-h mss
## 83 330
## Parse out cancer vs normal samples
sampleTables(coad)
## $`COAD_RNASeq2Gene-20160128`
##
## 01 02 06 11
## 283 1 1 41
##
## $`COAD_RNASeq2GeneNorm_illuminaga-20160128`
##
## 01
## 191
##
## $`COAD_RNASeq2GeneNorm_illuminahiseq-20160128`
##
## 01 02 06 11
## 283 1 1 41
##
## $`COAD_RNASeqGene-20160128`
##
## 01
## 10
coad2 <- TCGAsplitAssays(coad, c("01", "11"))
coad_rna_cancer <- getWithColData(coad2,
'01_COAD_RNASeq2Gene-20160128')
assay(coad_rna_cancer) <- log2(assay(coad_rna_cancer) + 1)
coad_cancer <- colData(coad_rna_cancer)
coad_rna_normal <- getWithColData(coad2,
'11_COAD_RNASeq2Gene-20160128')
assay(coad_rna_normal) <- log2(assay(coad_rna_normal) + 1)
coad_normal <- colData(coad_rna_normal)
table(coad_cancer$patient.microsatellite_instability_test_results.microsatellite_instability_test_result.mononucleotide_and_dinucleotide_marker_panel_analysis_status)
##
## msi-h mss
## 52 206
table(coad_normal$patient.microsatellite_instability_test_results.microsatellite_instability_test_result.mononucleotide_and_dinucleotide_marker_panel_analysis_status)
##
## msi-h mss
## 9 28
normal_tb <- coad_normal %>%
as.data.frame() %>%
select(patientID, patient.microsatellite_instability_test_results.microsatellite_instability_test_result.mononucleotide_and_dinucleotide_marker_panel_analysis_status)
cancer_tb <- coad_cancer %>%
as.data.frame() %>%
select(patientID, patient.microsatellite_instability_test_results.microsatellite_instability_test_result.mononucleotide_and_dinucleotide_marker_panel_analysis_status)
combined_tb <- inner_join(normal_tb, cancer_tb, by="patientID")
#validate_coad_rna <- validate(coad_rna, RAVmodel)
validate_coad_normal <- validate(coad_rna_normal, RAVmodel)
validate_coad_cancer <- validate(coad_rna_cancer, RAVmodel)
#heatmapTable(validate_coad_rna, RAVmodel, num.out = 10)
heatmapTable(validate_coad_normal, RAVmodel, num.out = 10, column_title="COAD Normal Samples")
## RAV1187 can be filtered based on GSEA_C2
heatmapTable(validate_coad_cancer, RAVmodel, num.out=10, column_title="COAD Cancer Samples")
## RAV61 can be filtered based on GSEA_C2
# Subset ## Filter attributes
# Cancer
sparsity_summary <- table(colSums(is.na(coad_cancer)))
sparsity_summary
##
## 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 17 18 19 20 23
## 79 75 20 73 2 17 2 2 1 24 3 1 25 2 1 1 1 3 1 4
## 25 30 32 33 35 36 37 38 40 41 43 44 45 48 50 54 56 57 58 59
## 2 1 1 2 16 2 2 2 13 1 1 9 1 1 1 1 13 2 1 3
## 63 64 65 66 67 68 69 72 80 81 95 96 97 98 100 105 106 112 139 148
## 8 3 5 2 3 5 2 2 3 1 5 2 1 1 1 1 1 1 1 1
## 149 154 176 177 178 179 183 184 188 189 193 198 199 204 205 206 208 209 210 211
## 1 2 5 8 2 1 1 2 1 1 3 1 1 2 9 2 1 2 5 9
## 212 213 214 215 217 218 220 223 224 225 226 227 228 230 231 232 233 234 235 236
## 2 1 2 1 3 1 1 5 1 4 2 1 1 2 15 1 3 1 13 4
## 237 238 239 240 241 242 244 246 247 248 249 250 251 252 253 254 255 256 257 258
## 1 1 5 14 6 3 2 7 3 2 3 1 1 13 2 55 5 61 18 30
## 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
## 53 4 11 6 3 10 10 16 1 13 2 41 5 43 34 44 17 8 54 103
## 279 280 281 282 283
## 280 287 115 287 351
# Normal
sparsity_summary_2 <- table(colSums(is.na(coad_normal)))
sparsity_summary_2
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 228 153 11 5 4 24 5 4 11 4 1 3 1 2 2 1 3 109 13 4
## 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
## 24 62 68 48 47 16 17 1 13 39 13 13 14 24 15 42 45 36 242 188
## 40 41
## 583 477
# Cancer
plot(stack(sparsity_summary)$ind,
stack(sparsity_summary)$values)
# Normal
plot(stack(sparsity_summary_2)$ind,
stack(sparsity_summary_2)$values)
# Select columns with >10% completeness
# Cancer
keep_attribute_ind <- which(colSums(!is.na(coad_cancer)) > round(nrow(coad_cancer)/10))
meta_sub1 <- coad_cancer[keep_attribute_ind]
meta_sub1 <- subset(coad_cancer, select= -patientID)
# Normal
keep_attribute_ind_2 <- which(colSums(!is.na(coad_normal)) > round(nrow(coad_normal)/10))
meta_sub1_2 <- coad_normal[keep_attribute_ind]
meta_sub1_2 <- subset(coad_normal, select= -patientID)
# Randomly select for 140 rows
# Cancer
set.seed(1)
random_sample_ind <- sample(1:nrow(meta_sub1), 140)
meta_sub2 <- meta_sub1[random_sample_ind,]
# Normal
set.seed(2)
random_sample_ind_2 <- sample(1:nrow(meta_sub1_2), 20)
meta_sub2_2 <- meta_sub1_2[random_sample_ind_2,]
# Check for data types in listData
# Cancer
unique(sapply(coad_cancer@listData, type))
## [1] "character" "integer" "double"
charcTb <- meta_sub2[, sapply(meta_sub2, class) == 'character']
numTb <- meta_sub2[, sapply(meta_sub2, class) %in% c('numeric', 'integer')]
# Sort numeric variables that have <=5 unique values as character variables
addToFactors <- c()
for (i in 1:length(numTb)) {
if (length(table(numTb[i])) <= 3) {
addToFactors <- c(addToFactors, i)
}
}
charcTb <- c(charcTb, numTb[addToFactors])
numTb <- numTb[-addToFactors]
# Normal
unique(sapply(coad_normal@listData, type))
## [1] "character" "integer" "double"
charcTb_2 <- meta_sub2_2[, sapply(meta_sub2_2, class) == 'character']
numTb_2 <- meta_sub2_2[, sapply(meta_sub2_2, class) %in% c('numeric', 'integer')]
# Sort numeric variables that have <=5 unique values as character variables
addToFactors_2 <- c()
for (i in 1:length(numTb_2)) {
if (length(table(numTb_2[i])) <= 3) {
addToFactors_2 <- c(addToFactors_2, i)
}
}
charcTb_2 <- c(charcTb_2, numTb_2[addToFactors_2])
numTb_2 <- numTb_2[-addToFactors_2]
# Calculate validation scores
# Cancer
sampleScore <- calculateScore(coad_rna_cancer, RAVmodel)
# Normal
sampleScore_2 <- calculateScore(coad_rna_normal, RAVmodel)
# Cancer
validated_ind <- validatedSignatures(validate_coad_cancer, RAVmodel, num.out = 15, scoreCutoff = 0.45, indexOnly = TRUE)
## RAV61 can be filtered based on GSEA_C2
## RAV438 can be filtered based on GSEA_C2
## Subset sampleScore to join with MCPcounter
sampleScore_sub <- sampleScore[random_sample_ind, validated_ind] %>% as.data.frame()
# Normal
validated_ind_2 <- validatedSignatures(validate_coad_normal, RAVmodel, num.out = 15, scoreCutoff = 0.45, indexOnly = TRUE)
## Subset sampleScore to join with MCPcounter
sampleScore_sub_2 <- sampleScore[random_sample_ind_2, validated_ind_2] %>% as.data.frame()
# Convert to factor data type
# Cancer
factorTb <- meta_sub2[, sapply(meta_sub1, class) == 'character']
factorTb[sapply(factorTb, is.character)] <- lapply(factorTb[sapply(factorTb, is.character)], factor, exclude = NULL)
#any(is.na(levels(factorTb[,2])))
#!any(is.na(levels(factorTb[,6])))
single_fac_ind <- c()
binary_fac_ind <- c()
multi_fac_ind <- c()
# Testing factor grouping
for (i in 1:length(factorTb)) {
if (nlevels(factorTb[,i]) == 1 |
(nlevels(factorTb[,i]) == 2 &
any(is.na(levels(factorTb[,i]))))
) {
single_fac_ind <- c(single_fac_ind, i)
} else if (nlevels(factorTb[,i]) == 3 &
any(is.na(levels(factorTb[,i]))) |
(nlevels(factorTb[,i]) == 2 &
!any(is.na(levels(factorTb[,i]))))
) {
binary_fac_ind <- c(binary_fac_ind, i)
} else {
multi_fac_ind <- c(multi_fac_ind, i)
}
}
new_factorTb <- factorTb[,multi_fac_ind]
binary_factor <- factorTb[,binary_fac_ind]
single_factor <- factorTb[,single_fac_ind]
# Normal
factorTb_2 <- meta_sub2_2[, sapply(meta_sub1_2, class) == 'character']
factorTb_2[sapply(factorTb_2, is.character)] <- lapply(factorTb_2[sapply(factorTb_2, is.character)], factor, exclude = NULL)
single_fac_ind_2 <- c()
binary_fac_ind_2 <- c()
multi_fac_ind_2 <- c()
# Testing factor grouping
for (i in 1:length(factorTb_2)) {
if (nlevels(factorTb_2[,i]) == 1 |
(nlevels(factorTb_2[,i]) == 2 &
any(is.na(levels(factorTb_2[,i]))))
) {
single_fac_ind_2 <- c(single_fac_ind_2, i)
} else if (nlevels(factorTb_2[,i]) == 3 &
any(is.na(levels(factorTb_2[,i]))) |
(nlevels(factorTb_2[,i]) == 2 &
!any(is.na(levels(factorTb_2[,i]))))
) {
binary_fac_ind_2 <- c(binary_fac_ind_2, i)
} else {
multi_fac_ind_2 <- c(multi_fac_ind_2, i)
}
}
new_factorTb_norm <- factorTb_2[,multi_fac_ind_2]
binary_factor_norm <- factorTb_2[,binary_fac_ind_2]
single_factor_norm <- factorTb_2[,single_fac_ind_2]
#Remove factors without enough y-observations
# Cancer
remove_index <- c()
for (i in seq_len(ncol(binary_factor))) {
x <- summary(binary_factor[,i])[1:2]
if (all(x > 1)) {
#print(x)
} else {
remove_index <- c(remove_index, i)
#print(paste("index", i, "does not have enough observations"))
}
}
binary_factor_2 <- binary_factor[,-remove_index]
wilcox_test_res <- as.data.frame(matrix(nrow = ncol(binary_factor_2),
ncol = ncol(sampleScore_sub)))
rownames(wilcox_test_res) <- colnames(binary_factor_2)
colnames(wilcox_test_res) <- colnames(sampleScore_sub)
wtest_coad_wvalue <- wilcox_test_res
wtest_coad_pvalue <- wilcox_test_res
for (i in seq_len(ncol(sampleScore_sub))) {
for (j in seq_len(ncol(binary_factor_2))) {
## wilcoxon test
wilcox_test <- wilcox.test(sampleScore_sub[, i] ~ binary_factor_2[,j], alternative="two.sided")
## W value
wval <- wilcox_test$statistic
wtest_coad_wvalue[j, i] <- wval
## p-value
pval <- wilcox_test$p.value
wtest_coad_pvalue[j, i] <- pval
}
}
batch_char_ind <- grep('analyte|analytes|portion|procurement|aliquot|uuid|barcode',
rownames(wtest_coad_wvalue))
wtest_coad_wvalue_2 <- wtest_coad_wvalue[-batch_char_ind,]
wtest_coad_pvalue_2 <- wtest_coad_pvalue[-batch_char_ind,]
# Normal
remove_index_2 <- c()
for (i in seq_len(ncol(binary_factor_norm))) {
x <- summary(binary_factor_norm[,i])[1:2]
if (all(x > 1)) {
print(x)
} else {
remove_index_2 <- c(remove_index_2, i)
print(x)
print(paste("index", i, "does not have enough observations"))
}
}
## female male
## 11 9
## no yes
## 17 1
## [1] "index 2 does not have enough observations"
## colon adenocarcinoma colon mucinous adenocarcinoma
## 19 1
## [1] "index 3 does not have enough observations"
## r0 r2
## 15 1
## [1] "index 4 does not have enough observations"
## black or african american white
## 1 8
## [1] "index 5 does not have enough observations"
## consented deceased
## 18 2
## germany united states
## 11 3
## colon adenocarcinoma colon mucinous adenocarcinoma
## 19 1
## [1] "index 8 does not have enough observations"
## no yes, history of prior malignancy
## 16 4
## no yes
## 6 1
## [1] "index 10 does not have enough observations"
## chemotherapy targeted molecular therapy
## 3 1
## [1] "index 11 does not have enough observations"
## no yes
## 3 1
## [1] "index 12 does not have enough observations"
## chemotherapy other, specify
## 3 1
## [1] "index 13 does not have enough observations"
## leucovorin oxaliplatin
## 2 2
## chemotherapy other, specify
## 2 2
## chemotherapy other, specify
## 2 1
## [1] "index 16 does not have enough observations"
## intravenous (iv) oral
## 2 1
## [1] "index 17 does not have enough observations"
## no yes
## 2 1
## [1] "index 18 does not have enough observations"
## tcga-a6-2671-d6056 tcga-a6-2682-d6912
## 1 1
## [1] "index 19 does not have enough observations"
## 4c5e40d8-6a8a-4638-8b23-2461b1dcdd79 ce77ae94-e1dc-41e4-989a-8891fd070a58
## 1 1
## [1] "index 20 does not have enough observations"
## 5 fu avastin
## 1 1
## [1] "index 21 does not have enough observations"
## 2500-5136 300-325
## 1 1
## [1] "index 22 does not have enough observations"
## adjuvant progression
## 1 1
## [1] "index 23 does not have enough observations"
## no yes
## 1 1
## [1] "index 24 does not have enough observations"
## chemotherapy targeted molecular therapy
## 1 1
## [1] "index 25 does not have enough observations"
## tcga-a6-2671-d6049 tcga-a6-2682-d6908
## 1 1
## [1] "index 26 does not have enough observations"
## 6d41c707-6c72-4ead-9e68-6677b9ae902d 891f0411-bdcc-468d-bc50-acc539d6bc11
## 1 1
## [1] "index 27 does not have enough observations"
## 5 fu leucovorin
## 1 1
## [1] "index 28 does not have enough observations"
## 2000-5000 700-735
## 1 1
## [1] "index 29 does not have enough observations"
## adjuvant palliative
## 1 1
## [1] "index 30 does not have enough observations"
## chemotherapy other, specify
## 1 1
## [1] "index 31 does not have enough observations"
## mg mg/kg
## 3 1
## [1] "index 32 does not have enough observations"
## 1 2
## 3 1
## [1] "index 33 does not have enough observations"
## mg mg/kg
## 3 1
## [1] "index 34 does not have enough observations"
## additional new tumor event scheduled follow-up submission
## 3 1
## [1] "index 35 does not have enough observations"
## complete remission/response progressive disease
## 1 1
## [1] "index 36 does not have enough observations"
## no yes
## 2 1
## [1] "index 37 does not have enough observations"
## no yes
## 1 2
## [1] "index 38 does not have enough observations"
## no yes
## 3 3
## tumor free with tumor
## 3 2
## additional new tumor event scheduled follow-up submission
## 1 1
## [1] "index 41 does not have enough observations"
## no yes
## 1 2
## [1] "index 42 does not have enough observations"
## tumor free with tumor
## 1 2
## [1] "index 43 does not have enough observations"
## complete remission/response partial remission/response
## 5 1
## [1] "index 44 does not have enough observations"
## no yes
## 1 1
## [1] "index 45 does not have enough observations"
## no yes
## 10 3
## tumor free with tumor
## 11 3
## no yes
## 13 5
## complete remission/response partial remission/response
## 5 1
## [1] "index 49 does not have enough observations"
## no yes
## 17 1
## [1] "index 50 does not have enough observations"
## alive dead
## 13 5
## female male
## 11 9
## colon adenocarcinoma colon mucinous adenocarcinoma
## 19 1
## [1] "index 53 does not have enough observations"
## no yes
## 9 8
## no yes
## 19 1
## [1] "index 55 does not have enough observations"
## 8140/3 8480/3
## 19 1
## [1] "index 56 does not have enough observations"
## no yes
## 16 2
## no yes
## 11 2
## no yes
## 11 5
## no yes
## 4 3
## tumor free with tumor
## 5 13
## no yes
## 15 5
## black or african american white
## 1 8
## [1] "index 63 does not have enough observations"
## r0 r2
## 15 1
## [1] "index 64 does not have enough observations"
## 5th 6th
## 11 6
## no yes
## 17 1
## [1] "index 66 does not have enough observations"
## no yes
## 13 7
## no yes
## 7 13
## no yes
## 13 3
## alive dead
## 17 3
## msi-h mss
## 5 14
## case matched expression control ffpe validation
## 18 2
## consented deceased
## 18 2
## germany united states
## 11 3
## colon adenocarcinoma colon mucinous adenocarcinoma
## 19 1
## [1] "index 75 does not have enough observations"
## no yes
## 11 9
## cryovial other (please specify)
## 18 2
## be127778-e160-4ae3-9e5a-13a16eae2e7a c671806f-013e-4d99-9841-cda5bd43eff1
## 1 1
## [1] "index 78 does not have enough observations"
## no yes
## 18 2
## tcga-a6-2684.9dabf713-dd0c-4388-b69e-f1bbb08e4d21.pdf
## 1
## tcga-a6-5659.16172149-a834-4f41-a786-5d87a69fc65c.pdf
## 1
## [1] "index 80 does not have enough observations"
## 16172149-a834-4f41-a786-5d87a69fc65c 9dabf713-dd0c-4388-b69e-f1bbb08e4d21
## 1 1
## [1] "index 81 does not have enough observations"
## no yes
## 5 2
## no yes
## 18 2
## repli-g (qiagen) dna rna
## 9 11
## r w
## 11 9
## no yes
## 18 2
## picogreen uv spec
## 8 1
## [1] "index 87 does not have enough observations"
## no yes
## 18 2
## no yes
## 18 2
## no yes
## 18 2
## no yes
## 18 2
## no yes
## 10 10
## no yes
## 18 2
## picogreen uv spec
## 2 18
## no yes
## 18 2
## tcga-a6-2684-01c-08-bs8 tcga-a6-5659-01b-04-bs4
## 1 1
## [1] "index 96 does not have enough observations"
## 3557f6b1-2db8-48fa-87a0-86b293ba9ffc 981d2609-a2d8-4573-b704-5711d132abcd
## 1 1
## [1] "index 97 does not have enough observations"
## tcga-a6-2684-01c-08-bs8.981d2609-a2d8-4573-b704-5711d132abcd.svs
## 1
## tcga-a6-5659-01b-04-bs4.3557f6b1-2db8-48fa-87a0-86b293ba9ffc.svs
## 1
## [1] "index 98 does not have enough observations"
## no yes
## 9 2
## 1758 a32z
## 1 7
## [1] "index 100 does not have enough observations"
## repli-g (qiagen) dna rna
## 2 9
## r w
## 9 2
## allprep rna extraction repli-g
## 9 2
## tcga-a6-2684-10a-01d-2188-10 tcga-a6-5659-10a-01d-a46w-08
## 1 1
## [1] "index 104 does not have enough observations"
## 0575ab82-6307-4b28-b46f-394309950246 205dbcc1-d726-49bd-8987-ffdfe0bd82ee
## 1 1
## [1] "index 105 does not have enough observations"
## 2188 a46w
## 1 1
## [1] "index 106 does not have enough observations"
## e h
## 1 1
## [1] "index 107 does not have enough observations"
## no yes
## 10 1
## [1] "index 108 does not have enough observations"
## adna preparation type chemical lysis dna extraction
## 9 2
## blood derived normal solid tissue normal
## 2 9
## tcga-a6-2684-11a tcga-a6-5659-11a
## 1 1
## [1] "index 111 does not have enough observations"
## 54aad2df-8ee1-474f-8d94-858cfca51970 bf2b2ca1-17b5-4a4d-bae8-46f571129706
## 1 1
## [1] "index 112 does not have enough observations"
## tcga-a6-2684-11a-01r-1757-13 tcga-a6-5659-11a-01r-1653-07
## 1 1
## [1] "index 113 does not have enough observations"
## 2897d2b0-0d34-4679-9b36-fc1385bfb5ee bdc32766-c722-43b9-946b-5204fd535f87
## 1 1
## [1] "index 114 does not have enough observations"
## c g
## 1 1
## [1] "index 115 does not have enough observations"
## tcga-a6-2684-11a-01r tcga-a6-5659-11a-01r
## 1 1
## [1] "index 116 does not have enough observations"
## 0d77fa62-02b5-46cb-974a-2d1d078e9f53 9ffa633a-c640-4cae-8e6e-2e4f9e57a85d
## 1 1
## [1] "index 117 does not have enough observations"
## tcga-a6-2684-11a-01d-1549-01 tcga-a6-5659-11a-01d-1649-01
## 1 1
## [1] "index 118 does not have enough observations"
## 2a11206d-f931-496c-aedb-0a522955d77f ee3881ea-2adb-424e-9a7b-1dd05dd93a34
## 1 1
## [1] "index 119 does not have enough observations"
## a c
## 1 1
## [1] "index 120 does not have enough observations"
## tcga-a6-2684-11a-01d-1551-05 tcga-a6-5659-11a-01d-1650-10
## 1 1
## [1] "index 121 does not have enough observations"
## 014243b3-34cd-4881-a197-6a79248cd89d b5eb58df-e753-4dda-b7d1-335de55750b9
## 1 1
## [1] "index 122 does not have enough observations"
## a c
## 1 1
## [1] "index 123 does not have enough observations"
## tcga-a6-2684-11a-01d-1554-10 tcga-a6-5659-11a-01d-1651-05
## 1 1
## [1] "index 124 does not have enough observations"
## 81311020-2894-49a1-90d0-6699b0141841 90152d16-ac20-46c2-81f7-c5ced81bba83
## 1 1
## [1] "index 125 does not have enough observations"
## c d
## 1 1
## [1] "index 126 does not have enough observations"
## tcga-a6-2684-11a-01d-1555-09 tcga-a6-5659-11a-01d-1862-23
## 1 1
## [1] "index 127 does not have enough observations"
## 83c9efe2-9bf9-432b-a8b0-45d7209e99a0 ff1bcb1b-7746-47b0-8665-c987d4df5995
## 1 1
## [1] "index 128 does not have enough observations"
## d e
## 1 1
## [1] "index 129 does not have enough observations"
## tcga-a6-2684-11a-01d-1547-02 tcga-a6-5659-11a-01d-1648-02
## 1 1
## [1] "index 130 does not have enough observations"
## 2e6beb98-208e-4cdf-ab01-73b9db5f9743 dd8601de-3687-4438-ae59-8acd37d07e32
## 1 1
## [1] "index 131 does not have enough observations"
## a c
## 1 1
## [1] "index 132 does not have enough observations"
## tcga-a6-2684-11a-01d tcga-a6-5659-11a-01d
## 1 1
## [1] "index 133 does not have enough observations"
## 1474e2bf-5cc7-47f3-93db-6ccb704f482d 83c3b52f-985c-459b-b1d2-165c38b153ca
## 1 1
## [1] "index 134 does not have enough observations"
## tcga-a6-2684-11a-01 tcga-a6-5659-11a-01
## 1 1
## [1] "index 135 does not have enough observations"
## 81181d2e-7cfa-44c6-b3f3-d08993bc1bd3 d91940bc-f960-467a-9a72-a4b0783e428f
## 1 1
## [1] "index 136 does not have enough observations"
## tcga-a6-2684-11a-01-ts1 tcga-a6-5659-11a-01-ts1
## 1 1
## [1] "index 137 does not have enough observations"
## 373c2bcf-43e7-4862-ad25-27319410fa23 d1a844c4-de66-4210-9201-8b74b14fea32
## 1 1
## [1] "index 138 does not have enough observations"
## tcga-a6-2684-11a-01-ts1.373c2bcf-43e7-4862-ad25-27319410fa23.svs
## 1
## tcga-a6-5659-11a-01-ts1.d1a844c4-de66-4210-9201-8b74b14fea32.svs
## 1
## [1] "index 139 does not have enough observations"
## tcga-a6-2684-01a-01r-a278-07 tcga-a6-5659-01a-01r-a278-07
## 1 1
## [1] "index 140 does not have enough observations"
## d517fd04-2868-4d22-a0d6-cfe81e885601 fa89193c-5cdd-4092-931e-867270bef743
## 1 1
## [1] "index 141 does not have enough observations"
## c g
## 1 1
## [1] "index 142 does not have enough observations"
## tcga-a6-2684-01a-01r-a46x-36 tcga-a6-5659-01a-01r-a46x-36
## 1 1
## [1] "index 143 does not have enough observations"
## 22fdac9c-0f50-426e-b89f-c5e7fdf7ab17 a40deaff-59e3-445e-b5ea-87608b5b449d
## 1 1
## [1] "index 144 does not have enough observations"
## c g
## 1 1
## [1] "index 145 does not have enough observations"
## h r
## 6 14
## allprep rna extraction mirvana (allprep dna) rna
## 14 6
## rna total rna
## 6 14
## r t
## 6 14
## allprep rna extraction total rna
## 6 14
## 0896 0897
## 3 2
## picogreen uv spec
## 8 12
## tcga-a6-2684-01a-01d-a274-01 tcga-aa-3518-01a-02d-1618-23
## 1 1
## [1] "index 153 does not have enough observations"
## 114e87d6-5b9c-41d7-924b-ea8a85b78798 43a50f21-31ce-462d-94ee-a2b069fcdb00
## 1 1
## [1] "index 154 does not have enough observations"
## 1618 a274
## 1 1
## [1] "index 155 does not have enough observations"
## b f
## 1 1
## [1] "index 156 does not have enough observations"
## tcga-a6-2684-01a-01d-a27a-05 tcga-aa-3518-01a-02d-1953-10
## 1 1
## [1] "index 157 does not have enough observations"
## 5edb7fd3-9802-4c1e-9956-b5fcfb900f28 8c9ff2b3-a02f-4ec1-b8c8-863e17447a23
## 1 1
## [1] "index 158 does not have enough observations"
## 1953 a27a
## 1 1
## [1] "index 159 does not have enough observations"
## no yes
## 8 12
## bottom top
## 7 13
## bottom top
## 13 7
## CIMP.H Cluster3
## 2 3
## Invasive MSI/CIMP
## 2 3
## M0 M1
## 7 2
## FEMALE MALE
## 7 2
## Colon Adenocarcinoma Colon Mucinous Adenocarcinoma
## 8 1
## [1] "index 167 does not have enough observations"
## NO YES
## 3 5
## 8140/3 8480/3
## 8 1
## [1] "index 169 does not have enough observations"
## NO YES
## 6 3
## TUMOR FREE WITH TUMOR
## 3 6
## NO YES
## 6 3
## NO YES
## 7 2
## DECEASED LIVING
## 1 8
## [1] "index 174 does not have enough observations"
binary_factor_norm_2 <- binary_factor_norm[,-remove_index_2]
wilcox_test_res_2 <- as.data.frame(matrix(nrow = ncol(binary_factor_norm_2),
ncol = ncol(sampleScore_sub_2)))
rownames(wilcox_test_res_2) <- colnames(binary_factor_norm_2)
colnames(wilcox_test_res_2) <- colnames(sampleScore_sub_2)
wtest_coad_wvalue_norm <- wilcox_test_res_2
wtest_coad_pvalue_norm <- wilcox_test_res_2
for (i in seq_len(ncol(sampleScore_sub_2))) {
for (j in seq_len(ncol(binary_factor_norm_2))) {
## wilcoxon test
wilcox_test <- wilcox.test(sampleScore_sub_2[, i] ~ binary_factor_norm_2[,j], alternative="two.sided")
## W value
wval <- wilcox_test$statistic
wtest_coad_wvalue_norm[j, i] <- wval
## p-value
pval <- wilcox_test$p.value
wtest_coad_pvalue_norm[j, i] <- pval
}
}
batch_char_ind <- grep('analyte|analytes|portion|procurement|aliquot|uuid|barcode',
rownames(wtest_coad_wvalue_norm))
wtest_coad_wvalue_norm_2 <- wtest_coad_wvalue_norm[-batch_char_ind,]
wtest_coad_pvalue_norm_2 <- wtest_coad_pvalue_norm[-batch_char_ind,]
heatmap(as.matrix(wtest_coad_wvalue_2), main = 'COAD Wilcoxon Test Cancer Binomial')
heatmap(as.matrix(wtest_coad_wvalue_norm_2), main = 'COAD Wilcoxon Test Normal Binomial')
#Only view w-values with significant p-value (<0.01)
# Cancer
wtest_coad_sig_wvalue <- wilcox_test_res
for (i in seq_len(ncol(sampleScore_sub))) {
for (j in seq_len(ncol(binary_factor_2))) {
if (wilcox.test(sampleScore_sub[, i] ~ binary_factor_2[,j], alternative="two.sided")$p.val < 0.01) {
wtest_coad_sig_wvalue[j, i] <- wilcox.test(sampleScore_sub[, i] ~ binary_factor_2[,j],
alternative="two.sided")$statistic
}
}
}
na_ind <- apply(wtest_coad_sig_wvalue, 1, function(x) all(is.na(x)))
wtest_coad_sig_wvalue <- wtest_coad_sig_wvalue[!na_ind, ]
batch_char_ind_2 <- grep('analyte|analytes|portion|procurement|aliquot|uuid|barcode',
rownames(wtest_coad_sig_wvalue))
wtest_coad_sig_wvalue <- wtest_coad_sig_wvalue[-batch_char_ind_2, ]
# Normal
wtest_coad_sig_wvalue_2 <- wilcox_test_res_2
for (i in seq_len(ncol(sampleScore_sub_2))) {
for (j in seq_len(ncol(binary_factor_norm_2))) {
if (wilcox.test(sampleScore_sub_2[, i] ~ binary_factor_norm_2[,j], alternative="two.sided")$p.val < 0.1) {
wtest_coad_sig_wvalue_2[j, i] <- wilcox.test(sampleScore_sub_2[, i] ~ binary_factor_norm_2[,j],
alternative="two.sided")$statistic
}
}
}
na_ind <- apply(wtest_coad_sig_wvalue_2, 1, function(x) all(is.na(x)))
wtest_coad_sig_wvalue_2 <- wtest_coad_sig_wvalue_2[!na_ind, ]
batch_char_ind_2 <- grep('analyte|analytes|portion|procurement|aliquot|uuid|barcode',
rownames(wtest_coad_sig_wvalue_2))
wtest_coad_sig_wvalue_2 <- wtest_coad_sig_wvalue_2[-batch_char_ind_2, ]
# Cancer
options(ztable.type='html')
ztable(wtest_coad_sig_wvalue) %>%
makeHeatmap(palette = 'Blues') %>%
print(caption='Cancer Sample W-test with p-values < 0.01')
 | RAV188 | RAV1575 | RAV324 | RAV832 | RAV61 | RAV1008 | RAV868 | RAV64 | RAV220 | RAV834 | RAV833 | RAV438 | RAV190 | RAV1166 | RAV192 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
patient.braf_gene_analysis_performed | 1094.00 | 362.00 | NA | 1086.00 | 349.00 | 333.00 | 1121.00 | 1142.00 | 352.00 | 347.00 | |||||
#Normal
# NONE
## Function to Display Categorical Attributes' Score Plots
# Cancer
graph_categorical <- function(a, b, c, d = 'Legend') {
sampleScore1 <- paste0('RAV', a)
sampleScore2 <- paste0('RAV', b)
plot_data <- sampleScore_sub[, c(sampleScore1, sampleScore2)]
colnames(plot_data)[1] <- "sampleScore1"
colnames(plot_data)[2] <- "sampleScore2"
plot_data <- data.frame(plot_data, binary_factor_2)
plot_data <- plot_data[which(!is.na(factor(plot_data[[c]]))),]
# print(plot_data)
colors <- gg_color_hue(length(unique(plot_data[[c]])))
colors.toplot <- c(colors)
pA <- ggplot(plot_data,
aes(x = sampleScore1, y = sampleScore2, color = plot_data[[c]]
)) +
geom_point() +
#ggtitle('Score Plot for Gender') +
scale_color_manual(values = colors.toplot, name = d) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
geom_hline(yintercept = 0, linetype = 'dashed') +
geom_vline(xintercept = 0, linetype = 'dashed') +
xlab(sampleScore1) + ylab(sampleScore2)
print(pA)
}
graph_categorical(834, 1166, 'patient.microsatellite_instability_test_results.microsatellite_instability_test_result.mononucleotide_and_dinucleotide_marker_panel_analysis_status', 'Microsatellite Status')
graph_categorical(834, 188, 'patient.braf_gene_analysis_result', 'BRAF Gene Analysis Result')
## Warning: Use of `plot_data[[c]]` is discouraged.
## ℹ Use `.data[[c]]` instead.
# Normal
graph_categorical_2 <- function(a, b, c, d = 'Legend') {
sampleScore1 <- paste0('RAV', a)
sampleScore2 <- paste0('RAV', b)
plot_data <- sampleScore_sub_2[, c(sampleScore1, sampleScore2)]
colnames(plot_data)[1] <- "sampleScore1"
colnames(plot_data)[2] <- "sampleScore2"
plot_data <- data.frame(plot_data, binary_factor_norm_2)
plot_data <- plot_data[which(!is.na(factor(plot_data[[c]]))),]
# print(plot_data)
colors <- gg_color_hue(length(unique(plot_data[[c]])))
colors.toplot <- c(colors)
pA <- ggplot(plot_data,
aes(x = sampleScore1, y = sampleScore2, color = plot_data[[c]]
)) +
geom_point() +
#ggtitle('Score Plot for Gender') +
scale_color_manual(values = colors.toplot, name = d) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
geom_hline(yintercept = 0, linetype = 'dashed') +
geom_vline(xintercept = 0, linetype = 'dashed') +
xlab(sampleScore1) + ylab(sampleScore2)
print(pA)
}
graph_categorical_2(833, 324, 'patient.microsatellite_instability_test_results.microsatellite_instability_test_result.mononucleotide_and_dinucleotide_marker_panel_analysis_status', 'Microsatellite Status')
## Warning: Use of `plot_data[[c]]` is discouraged.
## ℹ Use `.data[[c]]` instead.