Analyzing each dataset
library(DT)
library(UpSetR)
path_with_WMA <- '/Users/korshe/Documents/Data_Groningen/pbmc_5ds/'
dataset_ids <- c('stemi_v2','stemi_v3','v2_1m','v3_1m', 'ng')
tab_eqtls <- read.table(paste(path_with_WMA,'Sample.size._tab.tsv',sep=''), sep='\t', header=T)
tab <- tab_eqtls
for (dataset_id in dataset_ids){
#tab<- tab %>% group_by(bulk.feature_id) %>% arrange(cFDR_empirical_feature_p_value)
tab <- tab[order(tab[,paste0('p_value_', dataset_id )], decreasing = F),]
tab_bh = multtest::mt.rawp2adjp(c("BH","Bonferroni"),rawp = tab[,paste0('p_value_', dataset_id )])$adjp
tab[,paste0('BH_', dataset_id )] <- tab_bh[,2]
tab[,paste0('Bonf_', dataset_id )] <- tab_bh[,3]
}
UpSetR::upset(fromList(list(NG= tab[tab$BH_ng <0.05, ]$snp_gene,
STEMI_v2 = tab[tab$BH_stemi_v2 <0.05, ]$snp_gene,
STEMI_v3 = tab[tab$BH_stemi_v3 <0.05, ]$snp_gene,
V2_1M = tab[tab$BH_v2_1m <0.05, ]$snp_gene,
V3_1M = tab[tab$BH_v3_1m <0.05, ]$snp_gene
# ,
# WMA_SS = tab[tab$BH_ZW_ss <0.05, ]$snp_gene,
# WMA_SE = tab[tab$BH_ZW_SE <0.05, ]$snp_gene
)),
set_size.show = T, order.by = 'freq', nintersects = 60,nsets = 20,sets.x.label = 'BH eQTLs')

UpSetR::upset(fromList(list(NG= tab[tab$BH_ng <0.05, ]$feature_id,
STEMI_v2 = tab[tab$BH_stemi_v2 <0.05, ]$feature_id,
STEMI_v3 = tab[tab$BH_stemi_v3 <0.05, ]$feature_id,
V2_1M = tab[tab$BH_v2_1m <0.05, ]$feature_id,
V3_1M = tab[tab$BH_v3_1m <0.05, ]$feature_id
# ,
# WMA_SS = tab[tab$BH_ZW_ss <0.05, ]$feature_id,
# WMA_SE = tab[tab$BH_ZW_SE <0.05, ]$feature_id
)),
set_size.show = T, order.by = 'freq', nintersects = 60,nsets = 20,sets.x.label = 'BH eGenes')

UpSetR::upset(fromList(list(NG= tab[tab$Bonf_ng <0.05, ]$snp_gene,
STEMI_v2 = tab[tab$Bonf_stemi_v2 <0.05, ]$snp_gene,
STEMI_v3 = tab[tab$Bonf_stemi_v3 <0.05, ]$snp_gene,
V2_1M = tab[tab$Bonf_v2_1m <0.05, ]$snp_gene,
V3_1M = tab[tab$Bonf_v3_1m <0.05, ]$snp_gene
# ,
# WMA_SS = tab[tab$Bonf_ZW_ss <0.05, ]$snp_gene,
# WMA_SE = tab[tab$Bonf_ZW_SE <0.05, ]$snp_gene
)),
set_size.show = T, order.by = 'freq', nintersects = 60,nsets = 20,sets.x.label = 'Bonf eQTLs')

UpSetR::upset(fromList(list(NG= tab[tab$Bonf_ng <0.05, ]$feature_id,
STEMI_v2 = tab[tab$Bonf_stemi_v2 <0.05, ]$feature_id,
STEMI_v3 = tab[tab$Bonf_stemi_v3 <0.05, ]$feature_id,
V2_1M = tab[tab$Bonf_v2_1m <0.05, ]$feature_id,
V3_1M = tab[tab$Bonf_v3_1m <0.05, ]$feature_id
# ,
# WMA_SS = tab[tab$Bonf_ZW_ss <0.05, ]$feature_id,
# WMA_SE = tab[tab$Bonf_ZW_SE <0.05, ]$feature_id
)),
set_size.show = T, order.by = 'freq', nintersects = 60,nsets = 20,sets.x.label = 'Bonf eGenes')
## Expression parameters
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
param = 'mean.donor'
summary_stat_filteres <- tab
get_plot_for_values <- function(summary_stat_filteres,param){
#summary_stat_filteres <- summary_stat_filteres[1:500,]
param_cols <-c(paste0(param, '_stemi_v2'),
paste0(param, '_stemi_v3'),
paste0(param, '_v2_1m'),
paste0(param, '_v3_1m'),
paste0(param, '_ng'))
summary_stat_filteres_par <- summary_stat_filteres %>% select(one_of(param_cols), snp_gene)
summary_stat_filteres_par <- summary_stat_filteres_par[sample(nrow(summary_stat_filteres_par), 1500), ] #sample(summary_stat_filteres_par, 1000)
colnames(summary_stat_filteres_par) <- gsub(param, "", colnames(summary_stat_filteres_par))
colnames(summary_stat_filteres_par) <- gsub("_", "", colnames(summary_stat_filteres_par))
colnames(summary_stat_filteres_par)[ colnames(summary_stat_filteres_par) == 'stemiv2'] <- 'van_Blokland_v2'
colnames(summary_stat_filteres_par)[ colnames(summary_stat_filteres_par) == 'stemiv3'] <- 'van_Blokland_v3'
colnames(summary_stat_filteres_par)[ colnames(summary_stat_filteres_par) == 'ng'] <- 'van_der_Wijst_v2'
colnames(summary_stat_filteres_par)[ colnames(summary_stat_filteres_par) == 'v21m'] <- 'Oelen_v2'
colnames(summary_stat_filteres_par)[ colnames(summary_stat_filteres_par) == 'v31m'] <- 'Oelen_v3'
summary_stat_filteres_par <- summary_stat_filteres_par[order(as.numeric(summary_stat_filteres_par$van_der_Wijst_v2)), ]
summary_stat_filteres_par$gene <- 1:length(summary_stat_filteres_par$snpgene)
data_long <- gather(summary_stat_filteres_par, condition, measurement, 1:5, factor_key=TRUE)
data_long$measurement <- as.numeric(data_long$measurement)
print(param)
#data_long <- data_long[order(data_long$measurement),]
plot1<-
ggplot(data = data_long, aes(x = gene, y = measurement, color = condition)) + geom_line() +theme_light() +
scale_x_continuous(name="Gene id")
plot2<-ggplot(data = data_long,
mapping = aes(x = gene, y = measurement, color = condition)) +theme_light() +
geom_line() +
facet_grid(rows = vars(condition)) + ggtitle(param)
plot3 <- ggplot(data_long, aes(x = measurement, color = condition)) + theme_light() + geom_density(alpha=.3) +
scale_x_continuous(name=param, limits=c(0, 6))
# grid.arrange(plot1,plot2, plot3, ncol = 3)
# grid.arrange(plot1, plot3, ncol = 2)
grid.arrange(plot3, plot2, ncol = 2)
#grid.arrange(plot1p,plot2p)
}
get_plot_for_values(summary_stat_filteres,param='mean.donor')
## [1] "mean.donor"

get_plot_for_values(summary_stat_filteres,param='mean.cell')
## [1] "mean.cell"

get_plot_for_values(summary_stat_filteres,param='sd.donor')
## [1] "sd.donor"

get_plot_for_values(summary_stat_filteres,param='sd.cell')
## [1] "sd.cell"

get_plot_for_values(summary_stat_filteres,param='sd_prop')
## [1] "sd_prop"
## Warning: Removed 37 rows containing non-finite values (stat_density).

WMA efficiency estimate 5ds
weights <- read.table(paste(path_with_WMA,'list_of_weighting_ids.tsv',sep=''), sep='\t', header=T)
weights$x
## [1] "mean.donor" "sd.donor"
## [3] "variance.donor" "cv.donor"
## [5] "mean_log.donor" "sd_log.donor"
## [7] "variance_log.donor" "cv_log.donor"
## [9] "mean.cell" "sd.cell"
## [11] "variance.cell" "cv.cell"
## [13] "zeros.cell" "mean_log.cell"
## [15] "sd_log.cell" "variance_log.cell"
## [17] "cv_log.cell" "zeros_log.cell"
## [19] "avr_SD_donor" "inv_variance.cell"
## [21] "inv_variance.donor" "mean_donor_L_varience_donor"
## [23] "mean_cell_L_varience_cell" "mean_donor_L_variance_cell"
## [25] "mean_cell_L_variance_donor" "inv_sd.cell"
## [27] "inv_sd.donor" "mean_donor_L_sd_donor"
## [29] "mean_cell_L_sd_cell" "mean_donor_L_sd_cell"
## [31] "mean_cell_L_sd_donor" "sd_prop"
## [33] "mean_cell_sd_prop" "mean_cell_L_sd_prop"
## [35] "mean_donor_sd_prop" "mean_donor_L_sd_prop"
## [37] "sd_prop_L_mean_cell"
dataset_characteristics <- read.table(paste(path_with_WMA,'PBMC_dataset_specifications.csv',sep=''), sep=',', header=T)
datatable(dataset_characteristics,extensions = "Buttons",
options = list(paging = TRUE,
scrollX=TRUE,
searching = TRUE,
ordering = TRUE,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf') ))
Testing if the differences between summary statistics is
significant
t.test(tab$OverallZScore_stemi_v3,tab$OverallZScore_stemi_v2,alternative="two.sided", conf.level=0.95)
##
## Welch Two Sample t-test
##
## data: tab$OverallZScore_stemi_v3 and tab$OverallZScore_stemi_v2
## t = 6.4791, df = 468306, p-value = 9.235e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01462705 0.02731466
## sample estimates:
## mean of x mean of y
## 0.019233233 -0.001737625
t.test(tab$OverallZScore_v2_1m,tab$OverallZScore_v3_1m,alternative="two.sided", conf.level=0.95)
##
## Welch Two Sample t-test
##
## data: tab$OverallZScore_v2_1m and tab$OverallZScore_v3_1m
## t = -0.026983, df = 468730, p-value = 0.9785
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.006931284 0.006743032
## sample estimates:
## mean of x mean of y
## -0.007809981 -0.007715855
t.test(tab$OverallZScore_v2_1m,tab$ng,alternative="two.sided", conf.level=0.95)
##
## One Sample t-test
##
## data: tab$OverallZScore_v2_1m
## t = -3.1317, df = 234478, p-value = 0.001738
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.012697905 -0.002922057
## sample estimates:
## mean of x
## -0.007809981
t.test(tab$OverallZScore_v3_1m,tab$ng,alternative="two.sided", conf.level=0.95)
##
## One Sample t-test
##
## data: tab$OverallZScore_v3_1m
## t = -3.1633, df = 234483, p-value = 0.00156
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.012496563 -0.002935147
## sample estimates:
## mean of x
## -0.007715855
t.test(tab$OverallZScore_v2_1m,tab$OverallZScore_stemi_v2,alternative="two.sided", conf.level=0.95)
##
## Welch Two Sample t-test
##
## data: tab$OverallZScore_v2_1m and tab$OverallZScore_stemi_v2
## t = -1.8087, df = 463958, p-value = 0.07049
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0126524541 0.0005077419
## sample estimates:
## mean of x mean of y
## -0.007809981 -0.001737625
t.test(tab$OverallZScore_v3_1m,tab$OverallZScore_stemi_v3,alternative="two.sided", conf.level=0.95)
##
## Welch Two Sample t-test
##
## data: tab$OverallZScore_v3_1m and tab$OverallZScore_stemi_v3
## t = -7.9908, df = 467934, p-value = 1.344e-15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03355915 -0.02033903
## sample estimates:
## mean of x mean of y
## -0.007715855 0.019233233
Weights and weighting characteristics
Check what it the most optimal weighting using n eQTLs
BH-corrected
library(DT)
path_with_WMA <- '/Users/korshe/Documents/Data_Groningen/pbmc_5ds/'
BH <- read.table(paste(path_with_WMA,'BH','.tsv',sep=''), sep='\t', header=T)
BH <- BH[order(BH$eQTLs),]
BH <- BH[,c(10,11,1:9)]
DT::datatable(BH,extensions = "Buttons",
options = list(paging = TRUE,
scrollX=TRUE,
searching = TRUE,
ordering = TRUE,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf') ))
Check what it the most optimal weighting using n eQTLs
Bonferroni-corrected
Bonf <- read.table(paste(path_with_WMA,'Bonf','.tsv',sep=''), sep='\t', header=T)
Bonf <- Bonf[order(Bonf$eQTLs),]
Bonf <- Bonf[,c(10,11,1:9)]
DT::datatable(Bonf,extensions = "Buttons",
options = list(paging = TRUE,
scrollX=TRUE,
searching = TRUE,
ordering = TRUE,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf') ))
library(viridis)
## Loading required package: viridisLite
tab <- tab[complete.cases(tab),]
get_density <- function(x, y, ...) {
dens <- MASS::kde2d(x, y, ...)
ix <- findInterval(x, dens$x)
iy <- findInterval(y, dens$y)
ii <- cbind(ix, iy)
return(dens$z[ii])
}
tab$density <- get_density(tab$ZW_weight_ss, tab$OverallZScore.eqtlgen_ng, n = 105)
ggplot(tab) + geom_point(aes(tab$ZW_weight_ss, tab$OverallZScore.eqtlgen_ng, color = density)) + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.background = element_blank(),axis.line = element_line(colour = "grey")) + labs(y = "ZScore from eQTLGen", x = "Weighted ZScore (SS)") + scale_color_viridis()
## Warning: Use of `tab$ZW_weight_ss` is discouraged. Use `ZW_weight_ss` instead.
## Warning: Use of `tab$OverallZScore.eqtlgen_ng` is discouraged. Use
## `OverallZScore.eqtlgen_ng` instead.

ggplot(tab) + geom_point(aes(tab$ZW_weight_SE, tab$OverallZScore.eqtlgen_ng, color = density)) + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.background = element_blank(),axis.line = element_line(colour = "grey")) + labs(y = "ZScore from eQTLGen", x = "Weighted ZScore (SE)") + scale_color_viridis()
## Warning: Use of `tab$ZW_weight_SE` is discouraged. Use `ZW_weight_SE` instead.
## Use of `tab$OverallZScore.eqtlgen_ng` is discouraged. Use `OverallZScore.eqtlgen_ng` instead.

WMA estimate for pairwise weighting
path_wma_pairwise <- '/Users/korshe/Documents/Data_Groningen/pbmc_pairwise_2022/ALL/'
datset_combination = 'ng_vs_stemi_v2'
tab_prep <- function(path_wma_pairwise, datset_combination){
ng_stemi_v2 <- read.table(paste(path_wma_pairwise,'ALL_',datset_combination,'.tsv',sep=''), sep='\t', header=T)
ng_stemi_v2= ng_stemi_v2[order(ng_stemi_v2$Bonf_genes,ng_stemi_v2$Bonf_TP,decreasing = c(T,T)),]
ng_stemi_v2["rank_genes"] = c(1:nrow(ng_stemi_v2))
ng_stemi_v2$weight <- paste(ng_stemi_v2$ss, gsub('[0-9]+', '', rownames(ng_stemi_v2) ), sep='_')
return(ng_stemi_v2)
}
Between chemistries
ng_vs_stemi_v2 <- tab_prep(path_wma_pairwise, datset_combination)
datatable(ng_vs_stemi_v2[,c(1:2,4,6, 10,11)],extensions = "Buttons",
options = list(buttons = c('copy', 'csv', 'excel', 'pdf') ))
ng_vs_v2_1m <- tab_prep(path_wma_pairwise, datset_combination='ng_vs_v2_1m')
datatable(ng_vs_v2_1m[,c(1:2,4,6, 10,11)],extensions = "Buttons",
options = list(buttons = c('copy', 'csv', 'excel', 'pdf') ))
stemi_v3_vs_v3_1m <- tab_prep(path_wma_pairwise, datset_combination='stemi_v3_vs_v3_1m')
datatable(stemi_v3_vs_v3_1m[,c(1:2,4,6, 10,11)],extensions = "Buttons",
options = list(buttons = c('copy', 'csv', 'excel', 'pdf') ))
#library(matrixStats)
inter_chemistries_agr_rank <- merge(ng_vs_stemi_v2[,c('weight','rank_genes') ],ng_vs_v2_1m[,c('weight','rank_genes')],by= 'weight')
inter_chemistries_agr_rank <- merge(inter_chemistries_agr_rank,stemi_v3_vs_v3_1m[,c('weight','rank_genes')],by= 'weight')
inter_chemistries_agr_rank$weight <- gsub('.tsv', '', inter_chemistries_agr_rank$weight )
rownames(inter_chemistries_agr_rank) <- inter_chemistries_agr_rank$weight
inter_chemistries_agr_rank$weight <- NULL
inter_chemistries_agr_rank$agregated_rank <- rowSums(inter_chemistries_agr_rank)
inter_chemistries_agr_rank <- inter_chemistries_agr_rank[order(inter_chemistries_agr_rank$agregated_rank),]
datatable(inter_chemistries_agr_rank,extensions = "Buttons",
options = list(paging = TRUE,
scrollX=TRUE,
searching = TRUE,
ordering = TRUE,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf') ))
Between chemistries
stemi_v3_vs_stemi_v2 <- tab_prep(path_wma_pairwise, datset_combination='stemi_v3_vs_stemi_v2')
datatable(stemi_v3_vs_stemi_v2[,c(1:2,4,6, 10,11)],extensions = "Buttons",
options = list(buttons = c('copy', 'csv', 'excel', 'pdf') ))
ng_vs_v3_1m <- tab_prep(path_wma_pairwise, datset_combination='ng_vs_v3_1m')
datatable(ng_vs_v3_1m[,c(1:2,4,6, 10,11)],extensions = "Buttons",
options = list(buttons = c('copy', 'csv', 'excel', 'pdf') ))
v3_1m_vs_v2_1m <- tab_prep(path_wma_pairwise, datset_combination='v3_1m_vs_v2_1m')
datatable(v3_1m_vs_v2_1m[,c(1:2,4,6, 10,11)],extensions = "Buttons",
options = list(buttons = c('copy', 'csv', 'excel', 'pdf') ))
Getting aggregated rank
#library(matrixStats)
between_chemistries_agr_rank <- merge(v3_1m_vs_v2_1m[,c('weight','rank_genes') ],ng_vs_v3_1m[,c('weight','rank_genes')],by= 'weight')
between_chemistries_agr_rank <- merge(between_chemistries_agr_rank,stemi_v3_vs_stemi_v2[,c('weight','rank_genes')],by= 'weight')
between_chemistries_agr_rank$weight <- gsub('.tsv', '', between_chemistries_agr_rank$weight )
rownames(between_chemistries_agr_rank) <- between_chemistries_agr_rank$weight
between_chemistries_agr_rank$weight <- NULL
between_chemistries_agr_rank$agregated_rank <- rowSums(between_chemistries_agr_rank)
between_chemistries_agr_rank <- between_chemistries_agr_rank[order(between_chemistries_agr_rank$agregated_rank),]
datatable(between_chemistries_agr_rank,extensions = "Buttons",
options = list(paging = TRUE,
scrollX=TRUE,
searching = TRUE,
ordering = TRUE,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf') ))
Grid search
functions
dataset_combination <- 'ng_vs_stemi_v2'
grid_path <- '/Users/korshe/Documents/Data_Groningen/pbmc_pairwise_2022/grid_search/'
heatmap_fun <- function(grid_path,dataset_combination){
grid_tab <- read.table(paste(grid_path,'Grid_summary_',dataset_combination,'.tsv',sep=''), sep='\t', header=T)
dim(grid_tab)
grid_tab$param <- NULL
grid_tab <- grid_tab[order(grid_tab$grid),]
grid_tab <- grid_tab[!duplicated(grid_tab$grid),]
rownames(grid_tab) <- paste(grid_tab$grid)
grid_tab$grid <- NULL
nba_heatmap <- heatmap(t(as.matrix(grid_tab)),Colv = NA, Rowv = NA, scale="column", main = paste(dataset_combination, 'Grid search'))
}
library(knitr)
show_most_optimal_weight <- function(grid_path,dataset_combination){
grid_tab <- read.table(paste(grid_path,'Grid_summary_',dataset_combination,'.tsv',sep=''), sep='\t', header=T)
grid_tab$param <- NULL
grid_tab <- grid_tab[order(grid_tab$SS, decreasing = T),]
SS_grid <- grid_tab[1:5, c('grid','SS')]
#grid_tab <- grid_tab[grid_tab$grid < 15,]
grid_tab_long <- gather(grid_tab, weight, neQTLs, 1:38, factor_key=TRUE)
grid_tab_long <- grid_tab_long[order(grid_tab_long$neQTLs, decreasing = T),]
head(grid_tab_long)
max(grid_tab)
# knitr::kable(SS_grid, caption = paste(dataset_combination, 'SS-based WMA'))
# knitr::kable(grid_tab_long[1:20,], caption = paste(dataset_combination, 'Top grid with other params'))
t1 <- head(SS_grid)
t2 <- head(grid_tab_long[1:20,])
print(kable(t1),caption = paste(dataset_combination, 'SS-based WMA'))
print(kable(t2), caption = paste(dataset_combination, 'Top grid with other parameters'))
}
#show_most_optimal_weight(grid_path, dataset_combination)
library(dplyr)
library(ggplot2)
library(tidyr)
line_fun <- function(grid_path,dataset_combination){
grid_tab <- read.table(paste(grid_path,'Grid_summary_',dataset_combination,'.tsv',sep=''), sep='\t', header=T)
dim(grid_tab)
grid_tab$param <- NULL
grid_tab <- grid_tab[order(grid_tab$grid),]
grid_tab <- grid_tab[!duplicated(grid_tab$grid),]
rownames(grid_tab) <- paste(grid_tab$grid)
# removing columns which have avr score less than SS
grid_tab_means <- as.data.frame(colMeans(grid_tab))
colnames(grid_tab_means) <- 'mean'
grid_tab_means$weight <- rownames(grid_tab_means)
grid_tab_means_less_SS<- grid_tab_means[grid_tab_means$mean > grid_tab_means[1,1],'weight']
grid_tab_means <- grid_tab_means[order(grid_tab_means$mean, decreasing = T),]
grid_tab <- grid_tab[grid_tab$grid < 10,]
grid_tab_subset_top_5 <- grid_tab[,colnames(grid_tab) %in% c(grid_tab_means$weight[1:5],'SS', 'grid')]
dim(grid_tab_subset_top_5)
grid_tab_long <- gather(grid_tab_subset_top_5, weight, neQTLs, 1:6, factor_key=TRUE)
grid_tab_long %>% ggplot(aes(x=grid, y=neQTLs, group=weight, color=weight)) + geom_line() + theme_light() + ggtitle(paste(dataset_combination, "top 5 weights and Sample size"))
}
results
dataset_combination = 'ng_vs_stemi_v2'
heatmap_fun(grid_path,dataset_combination)

show_most_optimal_weight(grid_path,dataset_combination)
##
##
## | | grid| SS|
## |:--|---------:|---:|
## |3 | 0.0222222| 147|
## |4 | 0.0285714| 147|
## |5 | 0.0400000| 147|
## |2 | 0.0181818| 146|
## |6 | 0.0666667| 120|
##
##
## | | grid|weight | neQTLs|
## |:---|---------:|:-----------------------|------:|
## |274 | 0.1250000|grid_variance.donor | 149|
## |275 | 0.1333333|grid_variance.donor | 149|
## |276 | 0.1428571|grid_variance.donor | 149|
## |277 | 0.1538462|grid_variance.donor | 149|
## |626 | 0.1250000|grid_variance_log.donor | 149|
## |627 | 0.1333333|grid_variance_log.donor | 149|
line_fun(grid_path,dataset_combination)

dataset_combination = 'ng_vs_stemi_v3'
heatmap_fun(grid_path,dataset_combination)

show_most_optimal_weight(grid_path,dataset_combination)
##
##
## | | grid| SS|
## |:--|---------:|---:|
## |2 | 0.0181818| 160|
## |3 | 0.0222222| 160|
## |7 | 0.1000000| 145|
## |8 | 0.1052632| 145|
## |9 | 0.1111111| 145|
##
##
## | | grid|weight | neQTLs|
## |:----|---------:|:-----------------------|------:|
## |1872 | 0.2083333|grid_inv_variance.donor | 183|
## |1873 | 0.2173913|grid_inv_variance.donor | 183|
## |1874 | 0.2272727|grid_inv_variance.donor | 183|
## |1875 | 0.2380952|grid_inv_variance.donor | 183|
## |1861 | 0.2500000|grid_inv_variance.donor | 182|
## |1869 | 0.1818182|grid_inv_variance.donor | 182|
line_fun(grid_path,dataset_combination)

dataset_combination = 'ng_vs_v2_1m'
heatmap_fun(grid_path,dataset_combination)

show_most_optimal_weight(grid_path,dataset_combination)
##
##
## | | grid| SS|
## |:--|----:|---:|
## |53 | 2.0| 378|
## |47 | 1.4| 375|
## |55 | 2.4| 375|
## |56 | 2.6| 375|
## |54 | 2.2| 374|
##
##
## | | grid|weight | neQTLs|
## |:----|----:|:------------------------|------:|
## |308 | 15.0|grid_variance.donor | 389|
## |660 | 15.0|grid_variance_log.donor | 389|
## |2560 | 3.0|grid_mean_cell_L_sd_cell | 389|
## |103 | 3.6|grid_mean.donor | 388|
## |104 | 3.8|grid_mean.donor | 388|
## |108 | 4.0|grid_mean.donor | 388|
line_fun(grid_path,dataset_combination)

dataset_combination = 'ng_vs_v3_1m'
heatmap_fun(grid_path,dataset_combination)

show_most_optimal_weight(grid_path,dataset_combination)
##
##
## | | grid| SS|
## |:--|---------:|---:|
## |40 | 0.7692308| 398|
## |41 | 0.8333333| 398|
## |46 | 1.3000000| 393|
## |47 | 1.4000000| 392|
## |48 | 1.5000000| 392|
##
##
## | | grid|weight | neQTLs|
## |:----|----:|:-------------------------------|------:|
## |2115 | 1.3|grid_mean_donor_L_variance_cell | 400|
## |2119 | 1.1|grid_mean_donor_L_variance_cell | 400|
## |2120 | 1.2|grid_mean_donor_L_variance_cell | 400|
## |708 | 1.4|grid_cv_log.donor | 399|
## |711 | 1.1|grid_cv_log.donor | 399|
## |712 | 1.2|grid_cv_log.donor | 399|
line_fun(grid_path,dataset_combination)

dataset_combination = 'v3_1m_vs_stemi_v2'
heatmap_fun(grid_path,dataset_combination)

show_most_optimal_weight(grid_path,dataset_combination)
##
##
## | | grid| SS|
## |:--|---------:|---:|
## |38 | 0.6666667| 135|
## |39 | 0.7142857| 135|
## |40 | 0.7692308| 134|
## |42 | 0.9090909| 133|
## |41 | 0.8333333| 132|
##
##
## | | grid|weight | neQTLs|
## |:----|---------:|:------------------------|------:|
## |409 | 0.1250000|grid_cv.donor | 137|
## |431 | 0.1176471|grid_cv.donor | 137|
## |3257 | 0.6666667|grid_sd_prop_L_mean_cell | 136|
## |1 | 0.6666667|SS | 135|
## |2 | 0.7142857|SS | 135|
## |430 | 0.1111111|grid_cv.donor | 135|
dataset_combination = 'v3_1m_vs_stemi_v3'
heatmap_fun(grid_path,dataset_combination)

show_most_optimal_weight(grid_path,dataset_combination)
##
##
## | | grid| SS|
## |:--|---------:|--:|
## |2 | 0.0181818| 87|
## |3 | 0.0222222| 87|
## |4 | 0.0285714| 87|
## |5 | 0.0400000| 87|
## |6 | 0.0666667| 87|
##
##
## | | grid|weight | neQTLs|
## |:----|---------:|:----------------------|------:|
## |92 | 0.0400000|grid_mean.donor | 114|
## |179 | 0.0285714|grid_sd.donor | 114|
## |444 | 0.0400000|grid_mean_log.donor | 114|
## |531 | 0.0285714|grid_sd_log.donor | 114|
## |2906 | 0.0222222|grid_mean_cell_sd_prop | 114|
## |2907 | 0.0285714|grid_mean_cell_sd_prop | 114|
line_fun(grid_path,dataset_combination)

dataset_combination = 'v3_1m_vs_v2_1m'
heatmap_fun(grid_path,dataset_combination)

show_most_optimal_weight(grid_path,dataset_combination)
##
##
## | | grid| SS|
## |:--|----:|---:|
## |60 | 3.4| 445|
## |59 | 3.2| 444|
## |58 | 3.0| 442|
## |57 | 2.8| 438|
## |56 | 2.6| 437|
##
##
## | | grid|weight | neQTLs|
## |:----|----:|:-----------------------|------:|
## |2985 | 95|grid_mean_cell_sd_prop | 456|
## |2987 | 95|grid_mean_cell_sd_prop | 456|
## |3161 | 95|grid_mean_donor_sd_prop | 456|
## |3163 | 95|grid_mean_donor_sd_prop | 456|
## |217 | 10|grid_sd.donor | 455|
## |569 | 10|grid_sd_log.donor | 455|
line_fun(grid_path,dataset_combination)

WMA on subset of genes
For genes with high GC top three optimal WMA weights are XXX, XXX,
XXX.
path_summary <- '/Users/korshe/Documents/Data_Groningen/pbmc_pairwise_2022/subset_of_genes/'
dataset_combination <- 'v2_1m_vs_v3_1m'
parameter <- 'length'
top_param_high <- function(path_summary, dataset_combination, parameter){
high_tab <- read.table(paste(path_summary,'1_Total_summary_',dataset_combination,'Sample.size.',parameter,'.donor_HIGHT.tsv',sep=''), sep='\t', header=T)
high_tab_t <- as.data.frame(t(high_tab))
colnames(high_tab_t) <- high_tab_t[1,]
high_tab_t<- high_tab_t[-1,]
high_tab_t <- high_tab_t[order(high_tab_t$BH_SS_eQTLs, decreasing = T),]
high_tab_t_3 <- high_tab_t[1:5,c('BH_SS_genes','BH_SS_eQTLs','cor_SS_all','cor_SS_sign')]
high_tab_t_SS <- high_tab_t[rownames(high_tab_t)=="Sample.size.",c('BH_SS_genes','BH_SS_eQTLs','cor_SS_all','cor_SS_sign')]
high_tab_t_SE <- high_tab_t[rownames(high_tab_t)=="Sample.size.",c('BH_SE_genes','BH_SE_eQTLs','cor_SE_all','cor_SE_sign')]
rownames(high_tab_t_SE) <- "SE"
colnames(high_tab_t_SE) <-c('BH_SS_genes','BH_SS_eQTLs','cor_SS_all','cor_SS_sign')
high_tab_t_3 <- rbind(high_tab_t_3,high_tab_t_SS)
high_tab_t_3 <- rbind(high_tab_t_3,high_tab_t_SE)
knitr::kable(high_tab_t_3, caption = paste(dataset_combination, 'WMA of subset of genes with high', parameter))
}
top_param_low <- function(path_summary, dataset_combination, parameter){
high_tab <- read.table(paste(path_summary,'1_Total_summary_',dataset_combination,'Sample.size.',parameter,'.donor_LOW.tsv',sep=''), sep='\t', header=T)
high_tab_t <- as.data.frame(t(high_tab))
colnames(high_tab_t) <- high_tab_t[1,]
high_tab_t<- high_tab_t[-1,]
high_tab_t <- high_tab_t[order(high_tab_t$BH_SS_eQTLs, decreasing = T),]
high_tab_t_3 <- high_tab_t[1:5,c('BH_SS_genes','BH_SS_eQTLs','cor_SS_all','cor_SS_sign')]
high_tab_t_SS <- high_tab_t[rownames(high_tab_t)=="Sample.size.",c('BH_SS_genes','BH_SS_eQTLs','cor_SS_all','cor_SS_sign')]
high_tab_t_SE <- high_tab_t[rownames(high_tab_t)=="Sample.size.",c('BH_SE_genes','BH_SE_eQTLs','cor_SE_all','cor_SE_sign')]
rownames(high_tab_t_SE) <- "SE"
colnames(high_tab_t_SE) <-c('BH_SS_genes','BH_SS_eQTLs','cor_SS_all','cor_SS_sign')
high_tab_t_3 <- rbind(high_tab_t_3,high_tab_t_SS)
high_tab_t_3 <- rbind(high_tab_t_3,high_tab_t_SE)
knitr::kable(high_tab_t_3, caption = paste(dataset_combination, 'WMA of subset of genes with low', parameter))
}
1M v2 & v3
top_param_high(path_summary, dataset_combination, parameter='length')
v2_1m_vs_v3_1m WMA of subset of genes with high
length
| mean_donor_L_sd_donor |
12.0000000 |
105.0000000 |
0.3830421 |
0.5124326 |
| mean_cell_L_sd_donor |
12.0000000 |
105.0000000 |
0.3827140 |
0.5114275 |
| mean_cell_L_sd_prop |
12.0000000 |
105.0000000 |
0.3827140 |
0.5114275 |
| mean_donor_L_sd_prop |
12.0000000 |
105.0000000 |
0.3830421 |
0.5124326 |
| sd_prop |
13.0000000 |
102.0000000 |
0.3849117 |
0.5409461 |
| Sample.size. |
13.0000000 |
101.0000000 |
0.3912639 |
0.5482947 |
| SE |
12.0000000 |
102.0000000 |
0.3884130 |
0.5626165 |
top_param_low(path_summary, dataset_combination, parameter='length')
v2_1m_vs_v3_1m WMA of subset of genes with low length
| mean_cell_L_sd_cell |
17.0000000 |
182.0000000 |
0.3446227 |
0.8886137 |
| mean_donor_L_sd_cell |
17.0000000 |
181.0000000 |
0.3461420 |
0.8884718 |
| sd_log.cell |
17.0000000 |
165.0000000 |
0.3336581 |
0.8836933 |
| sd.cell |
16.0000000 |
164.0000000 |
0.3303284 |
0.8789485 |
| avr_SD_donor |
16.0000000 |
161.0000000 |
0.3419979 |
0.8676182 |
| Sample.size. |
15.0000000 |
141.0000000 |
0.3636316 |
0.9043358 |
| SE |
13.0000000 |
131.0000000 |
0.3693670 |
0.9068768 |
top_param_high(path_summary, dataset_combination, parameter='gc')
v2_1m_vs_v3_1m WMA of subset of genes with high gc
| sd_prop |
9.0000000 |
40.0000000 |
0.3189122 |
0.9176581 |
| zeros_log.cell |
8.0000000 |
39.0000000 |
0.3247360 |
0.9213615 |
| Sample.size. |
7.0000000 |
37.0000000 |
0.3268219 |
0.9501444 |
| mean_donor_L_varience_donor |
7.0000000 |
35.0000000 |
0.2969538 |
0.8942915 |
| mean_cell_L_variance_donor |
7.0000000 |
35.0000000 |
0.2967622 |
0.8946662 |
| Sample.size.1 |
7.0000000 |
37.0000000 |
0.3268219 |
0.9501444 |
| SE |
3.0000000 |
5.0000000 |
0.3269741 |
0.9481789 |
top_param_low(path_summary, dataset_combination, parameter='gc')
v2_1m_vs_v3_1m WMA of subset of genes with low gc
| cv_log.cell |
48.0000000 |
1418.0000000 |
0.5107185 |
0.8048854 |
| mean_cell_L_varience_cell |
47.0000000 |
1415.0000000 |
0.5125610 |
0.8121256 |
| mean_donor_L_variance_cell |
47.0000000 |
1399.0000000 |
0.5122781 |
0.8159857 |
| cv.cell |
48.0000000 |
1360.0000000 |
0.5036711 |
0.8115379 |
| cv_log.donor |
47.0000000 |
1250.0000000 |
0.4996034 |
0.8399226 |
| Sample.size. |
49.0000000 |
1238.0000000 |
0.5167595 |
0.8462178 |
| SE |
48.0000000 |
1385.0000000 |
0.5138727 |
0.8058381 |
top_param_high(path_summary, dataset_combination, parameter='mean')
v2_1m_vs_v3_1m WMA of subset of genes with high mean
| variance.cell |
26.0000000 |
489.0000000 |
0.2588198 |
0.4702910 |
| variance_log.cell |
24.0000000 |
481.0000000 |
0.2563483 |
0.4639215 |
| mean_log.cell |
23.0000000 |
474.0000000 |
0.2598463 |
0.4724762 |
| mean_log.donor |
23.0000000 |
472.0000000 |
0.2598666 |
0.4717395 |
| mean.donor |
23.0000000 |
461.0000000 |
0.2653893 |
0.4956494 |
| Sample.size. |
21.0000000 |
415.0000000 |
0.2412562 |
0.4468668 |
| SE |
23.0000000 |
429.0000000 |
0.2338139 |
0.4278087 |
top_param_low(path_summary, dataset_combination, parameter='mean')
v2_1m_vs_v3_1m WMA of subset of genes with low mean
| cv_log.donor |
4.0000000 |
19.0000000 |
0.2364163 |
0.9426668 |
| mean_cell_L_varience_cell |
4.0000000 |
19.0000000 |
0.2682027 |
0.9398309 |
| cv_log.cell |
4.0000000 |
16.0000000 |
0.2670292 |
0.8773096 |
| Sample.size. |
3.0000000 |
15.0000000 |
0.2939085 |
0.7949185 |
| zeros_log.cell |
3.0000000 |
15.0000000 |
0.2878997 |
0.7890365 |
| Sample.size.1 |
3.0000000 |
15.0000000 |
0.2939085 |
0.7949185 |
| SE |
3.0000000 |
11.0000000 |
0.2792001 |
0.8228778 |
top_param_high(path_summary, dataset_combination, parameter='sd')
v2_1m_vs_v3_1m WMA of subset of genes with high sd
| mean_donor_L_sd_donor |
21.0000000 |
396.0000000 |
0.3469961 |
0.6797311 |
| mean_cell_L_sd_donor |
21.0000000 |
396.0000000 |
0.3468789 |
0.6798414 |
| mean_cell_L_sd_prop |
21.0000000 |
396.0000000 |
0.3468789 |
0.6798414 |
| mean_donor_L_sd_prop |
21.0000000 |
396.0000000 |
0.3469961 |
0.6797311 |
| mean_log.donor |
18.0000000 |
394.0000000 |
0.3392858 |
0.6630351 |
| Sample.size. |
19.0000000 |
367.0000000 |
0.3383879 |
0.6794109 |
| SE |
22.0000000 |
373.0000000 |
0.3315468 |
0.6615580 |
top_param_low(path_summary, dataset_combination, parameter='sd')
v2_1m_vs_v3_1m WMA of subset of genes with low sd
| Sample.size. |
0.0000000 |
0.0000000 |
0.2464812 |
0.0000000 |
| mean.donor |
0.0000000 |
0.0000000 |
0.3289821 |
0.0000000 |
| sd.donor |
0.0000000 |
0.0000000 |
0.3271387 |
0.0000000 |
| variance.donor |
0.0000000 |
0.0000000 |
0.3314561 |
0.0000000 |
| cv.donor |
0.0000000 |
0.0000000 |
0.1657964 |
0.0000000 |
| Sample.size.1 |
0.0000000 |
0.0000000 |
0.2464812 |
0.0000000 |
| SE |
0.0000000 |
0.0000000 |
0.2271203 |
0.0000000 |
NG v2 & 1M v3
dataset_combination ='ng_vs_v3_1m'
top_param_high(path_summary, dataset_combination, parameter='length')
ng_vs_v3_1m WMA of subset of genes with high length
| Sample.size. |
9.0000000 |
48.0000000 |
0.3317558 |
0.9455553 |
| zeros.cell |
9.0000000 |
48.0000000 |
0.3339620 |
0.9428702 |
| zeros_log.cell |
9.0000000 |
48.0000000 |
0.3322678 |
0.9454301 |
| sd_prop |
9.0000000 |
47.0000000 |
0.3267121 |
0.9465538 |
| mean_donor_L_variance_cell |
9.0000000 |
46.0000000 |
0.3240715 |
0.9469643 |
| Sample.size.1 |
9.0000000 |
48.0000000 |
0.3317558 |
0.9455553 |
| SE |
9.0000000 |
47.0000000 |
0.3287993 |
0.9491593 |
top_param_low(path_summary, dataset_combination, parameter='length')
ng_vs_v3_1m WMA of subset of genes with low length
| mean_log.donor |
11.0000000 |
79.0000000 |
0.2617560 |
0.8558849 |
| sd.cell |
12.0000000 |
78.0000000 |
0.2748296 |
0.8664076 |
| mean_log.cell |
11.0000000 |
77.0000000 |
0.2614956 |
0.8463045 |
| sd_log.cell |
12.0000000 |
77.0000000 |
0.2771293 |
0.8672397 |
| zeros.cell |
12.0000000 |
76.0000000 |
0.2855283 |
0.8775576 |
| Sample.size. |
10.0000000 |
47.0000000 |
0.2994051 |
0.8635707 |
| SE |
11.0000000 |
54.0000000 |
0.3064150 |
0.8584953 |
top_param_high(path_summary, dataset_combination, parameter='gc')
ng_vs_v3_1m WMA of subset of genes with high gc
| Sample.size. |
4.0000000 |
4.0000000 |
0.3413259 |
0.0000000 |
| mean.donor |
4.0000000 |
4.0000000 |
0.3107390 |
0.0000000 |
| sd.donor |
4.0000000 |
4.0000000 |
0.3205555 |
0.0000000 |
| cv.donor |
4.0000000 |
4.0000000 |
0.3308767 |
0.0000000 |
| mean_log.donor |
4.0000000 |
4.0000000 |
0.3113296 |
0.0000000 |
| Sample.size.1 |
4.0000000 |
4.0000000 |
0.3413259 |
0.0000000 |
| SE |
4.0000000 |
4.0000000 |
0.3472766 |
0.0000000 |
top_param_low(path_summary, dataset_combination, parameter='gc')
ng_vs_v3_1m WMA of subset of genes with low gc
| mean_donor_L_sd_donor |
35.0000000 |
949.0000000 |
0.4795445 |
0.7801512 |
| mean_donor_L_sd_prop |
35.0000000 |
949.0000000 |
0.4795445 |
0.7801512 |
| mean_cell_L_sd_donor |
35.0000000 |
946.0000000 |
0.4788480 |
0.7796632 |
| mean_cell_L_sd_prop |
35.0000000 |
946.0000000 |
0.4788480 |
0.7796632 |
| zeros_log.cell |
34.0000000 |
945.0000000 |
0.4729856 |
0.7750755 |
| Sample.size. |
33.0000000 |
944.0000000 |
0.4735239 |
0.7756094 |
| SE |
38.0000000 |
967.0000000 |
0.4708052 |
0.7702254 |
top_param_high(path_summary, dataset_combination, parameter='mean')
ng_vs_v3_1m WMA of subset of genes with high mean
| inv_variance.cell |
20.0000000 |
564.0000000 |
0.1882468 |
0.5218753 |
| inv_sd.donor |
20.0000000 |
525.0000000 |
0.1932009 |
0.5618353 |
| mean_donor_L_varience_donor |
19.0000000 |
519.0000000 |
0.1953610 |
0.5596007 |
| mean_cell_L_variance_donor |
18.0000000 |
505.0000000 |
0.1956957 |
0.5670303 |
| cv.cell |
19.0000000 |
490.0000000 |
0.1920465 |
0.5370476 |
| Sample.size. |
16.0000000 |
470.0000000 |
0.2157812 |
0.5490202 |
| SE |
18.0000000 |
472.0000000 |
0.2196725 |
0.5730657 |
top_param_low(path_summary, dataset_combination, parameter='mean')
ng_vs_v3_1m WMA of subset of genes with low mean
| mean.donor |
3.0000000 |
12.0000000 |
0.3216376 |
0.9309261 |
| variance.donor |
3.0000000 |
12.0000000 |
0.3284211 |
0.9298793 |
| mean_log.donor |
3.0000000 |
12.0000000 |
0.3216493 |
0.9309324 |
| variance_log.donor |
3.0000000 |
12.0000000 |
0.3284211 |
0.9298793 |
| variance.cell |
3.0000000 |
12.0000000 |
0.3151593 |
0.9303253 |
| Sample.size. |
2.0000000 |
6.0000000 |
0.3416197 |
0.9999045 |
| SE |
1.0000000 |
1.0000000 |
0.3438839 |
0.0000000 |
top_param_high(path_summary, dataset_combination, parameter='sd')
ng_vs_v3_1m WMA of subset of genes with high sd
| cv_log.cell |
24.0000000 |
684.0000000 |
0.3470991 |
0.7124770 |
| cv.cell |
26.0000000 |
678.0000000 |
0.3398562 |
0.7139733 |
| inv_sd.cell |
25.0000000 |
673.0000000 |
0.3427132 |
0.7215641 |
| Sample.size. |
25.0000000 |
672.0000000 |
0.3640568 |
0.7235983 |
| mean_donor_L_varience_donor |
26.0000000 |
666.0000000 |
0.3325521 |
0.7317342 |
| Sample.size.1 |
25.0000000 |
672.0000000 |
0.3640568 |
0.7235983 |
| SE |
23.0000000 |
659.0000000 |
0.3659004 |
0.7381223 |
top_param_low(path_summary, dataset_combination, parameter='sd')
ng_vs_v3_1m WMA of subset of genes with low sd
| mean.donor |
3.0000000 |
12.0000000 |
0.3201567 |
0.9309261 |
| variance.donor |
3.0000000 |
12.0000000 |
0.3233939 |
0.9298793 |
| mean_log.donor |
3.0000000 |
12.0000000 |
0.3201662 |
0.9309324 |
| variance_log.donor |
3.0000000 |
12.0000000 |
0.3233939 |
0.9298793 |
| variance.cell |
3.0000000 |
12.0000000 |
0.3158997 |
0.9303253 |
| Sample.size. |
2.0000000 |
6.0000000 |
0.3263891 |
0.9999045 |
| SE |
1.0000000 |
1.0000000 |
0.3302035 |
0.0000000 |
#parameters <- c('gc','length','mean','sd')
NG v2 & STEMI v2
dataset_combination ='ng_vs_stemi_v2'
top_param_high(path_summary, dataset_combination, parameter='length')
ng_vs_stemi_v2 WMA of subset of genes with high
length
| variance.donor |
5.0000000 |
26.0000000 |
0.2432222 |
0.7875336 |
| variance_log.donor |
5.0000000 |
26.0000000 |
0.2432313 |
0.7875200 |
| zeros.cell |
5.0000000 |
26.0000000 |
0.2589024 |
0.8261392 |
| cv_log.cell |
5.0000000 |
26.0000000 |
0.2630279 |
0.9357856 |
| mean_cell_L_varience_cell |
5.0000000 |
26.0000000 |
0.2501131 |
0.8327964 |
| Sample.size. |
4.0000000 |
25.0000000 |
0.2625132 |
0.9304219 |
| SE |
5.0000000 |
26.0000000 |
0.2597212 |
0.8299394 |
top_param_low(path_summary, dataset_combination, parameter='length')
ng_vs_stemi_v2 WMA of subset of genes with low length
| mean_donor_L_varience_donor |
3.0000000 |
8.0000000 |
0.2934226 |
0.9656829 |
| mean_donor_L_sd_donor |
3.0000000 |
8.0000000 |
0.3109583 |
0.9671971 |
| mean_donor_L_sd_prop |
3.0000000 |
8.0000000 |
0.3109583 |
0.9671971 |
| cv.cell |
2.0000000 |
6.0000000 |
0.3258072 |
0.9950748 |
| cv_log.cell |
2.0000000 |
6.0000000 |
0.3271891 |
0.9951192 |
| Sample.size. |
1.0000000 |
2.0000000 |
0.3230321 |
0.0000000 |
| SE |
2.0000000 |
3.0000000 |
0.3015067 |
0.0000000 |
top_param_high(path_summary, dataset_combination, parameter='gc')
ng_vs_stemi_v2 WMA of subset of genes with high gc
| cv.cell |
1.0000000 |
1.0000000 |
0.2527622 |
0.0000000 |
| zeros.cell |
1.0000000 |
1.0000000 |
0.2685889 |
0.0000000 |
| zeros_log.cell |
1.0000000 |
1.0000000 |
0.2703194 |
0.0000000 |
| inv_variance.cell |
1.0000000 |
1.0000000 |
0.2362931 |
0.0000000 |
| inv_variance.donor |
1.0000000 |
1.0000000 |
0.2219016 |
0.0000000 |
| Sample.size. |
0.0000000 |
0.0000000 |
0.2702047 |
0.0000000 |
| SE |
1.0000000 |
1.0000000 |
0.2627731 |
0.0000000 |
top_param_low(path_summary, dataset_combination, parameter='gc')
ng_vs_stemi_v2 WMA of subset of genes with low gc
| inv_sd.donor |
9.0000000 |
373.0000000 |
0.3511033 |
0.9427655 |
| cv.cell |
9.0000000 |
370.0000000 |
0.3426099 |
0.9429228 |
| cv_log.donor |
11.0000000 |
363.0000000 |
0.3321501 |
0.9400661 |
| sd.cell |
8.0000000 |
348.0000000 |
0.3302741 |
0.9543884 |
| sd_log.cell |
8.0000000 |
348.0000000 |
0.3323872 |
0.9538529 |
| Sample.size. |
8.0000000 |
315.0000000 |
0.3535123 |
0.9504660 |
| SE |
2.0000000 |
21.0000000 |
0.3570769 |
0.8130366 |
top_param_high(path_summary, dataset_combination, parameter='mean')
ng_vs_stemi_v2 WMA of subset of genes with high mean
| sd_prop |
26.0000000 |
566.0000000 |
0.1551807 |
0.4704803 |
| inv_sd.donor |
24.0000000 |
565.0000000 |
0.1546213 |
0.4687628 |
| mean_cell_L_varience_cell |
27.0000000 |
561.0000000 |
0.1615674 |
0.4762640 |
| inv_sd.cell |
25.0000000 |
561.0000000 |
0.1612236 |
0.4760984 |
| avr_SD_donor |
23.0000000 |
558.0000000 |
0.1525568 |
0.4704280 |
| Sample.size. |
22.0000000 |
472.0000000 |
0.1640899 |
0.5071582 |
| SE |
29.0000000 |
577.0000000 |
0.1601348 |
0.4726863 |
top_param_low(path_summary, dataset_combination, parameter='mean')
ng_vs_stemi_v2 WMA of subset of genes with low mean
| variance.cell |
0.000000e+00 |
0.000000e+00 |
9.385707e-02 |
0.000000e+00 |
| variance_log.cell |
0.000000e+00 |
0.000000e+00 |
9.387245e-02 |
0.000000e+00 |
| mean_cell_L_sd_donor |
1.0000000 |
3.0000000 |
0.1221018 |
0.0000000 |
| mean_cell_L_sd_prop |
1.0000000 |
3.0000000 |
0.1221018 |
0.0000000 |
| Sample.size. |
0.0000000 |
0.0000000 |
0.1566476 |
0.0000000 |
| Sample.size.1 |
0.0000000 |
0.0000000 |
0.1566476 |
0.0000000 |
| SE |
0.0000000 |
0.0000000 |
0.1696128 |
0.0000000 |
top_param_high(path_summary, dataset_combination, parameter='sd')
ng_vs_stemi_v2 WMA of subset of genes with high sd
| mean_donor_L_sd_donor |
26.0000000 |
589.0000000 |
0.2857967 |
0.7401483 |
| mean_donor_L_sd_prop |
26.0000000 |
589.0000000 |
0.2857967 |
0.7401483 |
| inv_sd.donor |
24.0000000 |
588.0000000 |
0.2889890 |
0.7478492 |
| inv_variance.donor |
24.0000000 |
586.0000000 |
0.2826482 |
0.7318523 |
| mean_donor_L_varience_donor |
25.0000000 |
585.0000000 |
0.2770441 |
0.7296780 |
| Sample.size. |
21.0000000 |
492.0000000 |
0.3036785 |
0.7773670 |
| SE |
27.0000000 |
581.0000000 |
0.2943974 |
0.7225017 |
top_param_low(path_summary, dataset_combination, parameter='sd')
ng_vs_stemi_v2 WMA of subset of genes with low sd
| mean_cell_L_sd_donor |
1.0000000 |
3.0000000 |
0.1298505 |
0.0000000 |
| mean_cell_L_sd_prop |
1.0000000 |
3.0000000 |
0.1298505 |
0.0000000 |
| Sample.size. |
0.0000000 |
0.0000000 |
0.1505765 |
0.0000000 |
| mean.donor |
0.0000000 |
0.0000000 |
0.1595669 |
0.0000000 |
| sd.donor |
0.0000000 |
0.0000000 |
0.1633831 |
0.0000000 |
| Sample.size.1 |
0.0000000 |
0.0000000 |
0.1505765 |
0.0000000 |
| SE |
0.0000000 |
0.0000000 |
0.1610945 |
0.0000000 |
NG v2 & 1M v2
dataset_combination ='ng_vs_v2_1m'
top_param_high(path_summary, dataset_combination, parameter='length')
ng_vs_v2_1m WMA of subset of genes with high length
| variance.cell |
1.000000e+01 |
9.800000e+01 |
3.189260e-01 |
6.691013e-01 |
| variance_log.cell |
1.000000e+01 |
9.800000e+01 |
3.191818e-01 |
6.689081e-01 |
| mean_cell_L_variance_donor |
11.0000000 |
120.0000000 |
0.3419234 |
0.6858609 |
| sd_log.cell |
11.00000000 |
119.00000000 |
0.32801015 |
0.66921562 |
| mean_donor_L_sd_donor |
11.00000000 |
119.00000000 |
0.33417925 |
0.67343220 |
| Sample.size. |
9.00000000 |
115.00000000 |
0.33444227 |
0.67846350 |
| SE |
13.00000000 |
93.00000000 |
0.34774581 |
0.40433683 |
top_param_low(path_summary, dataset_combination, parameter='length')
ng_vs_v2_1m WMA of subset of genes with low length
| mean.donor |
8.0000000 |
33.0000000 |
0.3498154 |
0.9096587 |
| sd.donor |
8.0000000 |
33.0000000 |
0.3437112 |
0.9094820 |
| variance.donor |
8.0000000 |
33.0000000 |
0.3156178 |
0.9196858 |
| mean_log.donor |
8.0000000 |
33.0000000 |
0.3501954 |
0.9095520 |
| sd_log.donor |
8.0000000 |
33.0000000 |
0.3438656 |
0.9094445 |
| Sample.size. |
6.0000000 |
15.0000000 |
0.3725321 |
0.9362740 |
| SE |
7.0000000 |
36.0000000 |
0.3832980 |
0.9399126 |
top_param_high(path_summary, dataset_combination, parameter='gc')
ng_vs_v2_1m WMA of subset of genes with high gc
| Sample.size. |
1.0000000 |
1.0000000 |
0.2958102 |
0.0000000 |
| mean.donor |
1.0000000 |
1.0000000 |
0.2929579 |
0.0000000 |
| sd.donor |
1.0000000 |
1.0000000 |
0.2881051 |
0.0000000 |
| variance.donor |
1.0000000 |
1.0000000 |
0.2715610 |
0.0000000 |
| cv.donor |
1.0000000 |
1.0000000 |
0.2884989 |
0.0000000 |
| Sample.size.1 |
1.0000000 |
1.0000000 |
0.2958102 |
0.0000000 |
| SE |
1.0000000 |
1.0000000 |
0.2870843 |
0.0000000 |
top_param_low(path_summary, dataset_combination, parameter='gc')
ng_vs_v2_1m WMA of subset of genes with low gc
| inv_variance.donor |
28.0000000 |
632.0000000 |
0.4560842 |
0.8463953 |
| inv_variance.cell |
27.00000000 |
591.00000000 |
0.46198021 |
0.90272872 |
| inv_sd.donor |
29.00000000 |
589.00000000 |
0.46309785 |
0.86479291 |
| cv.cell |
28.00000000 |
586.00000000 |
0.46548434 |
0.86423005 |
| inv_sd.cell |
28.00000000 |
586.00000000 |
0.46435219 |
0.86425753 |
| Sample.size. |
22.0000000 |
521.0000000 |
0.4627987 |
0.8568584 |
| SE |
27.0000000 |
611.0000000 |
0.4701973 |
0.8768237 |
top_param_high(path_summary, dataset_combination, parameter='mean')
ng_vs_v2_1m WMA of subset of genes with high mean
| cv.donor |
25.0000000 |
472.0000000 |
0.1670159 |
0.4316799 |
| cv_log.donor |
25.0000000 |
472.0000000 |
0.1685668 |
0.4325850 |
| cv.cell |
24.0000000 |
472.0000000 |
0.1732926 |
0.4391895 |
| sd_prop_L_mean_cell |
25.0000000 |
472.0000000 |
0.1671250 |
0.4314487 |
| inv_variance.cell |
25.0000000 |
471.0000000 |
0.1753585 |
0.4618821 |
| Sample.size. |
21.0000000 |
457.0000000 |
0.1792590 |
0.4667053 |
| SE |
24.0000000 |
518.0000000 |
0.1870202 |
0.4226345 |
top_param_low(path_summary, dataset_combination, parameter='mean')
ng_vs_v2_1m WMA of subset of genes with low mean
| Sample.size. |
1.0000000 |
2.0000000 |
0.2993830 |
0.0000000 |
| mean.donor |
1.0000000 |
2.0000000 |
0.2958014 |
0.0000000 |
| sd.donor |
1.0000000 |
2.0000000 |
0.2963371 |
0.0000000 |
| variance.donor |
1.0000000 |
2.0000000 |
0.2887124 |
0.0000000 |
| mean_log.donor |
1.0000000 |
2.0000000 |
0.2958042 |
0.0000000 |
| Sample.size.1 |
1.0000000 |
2.0000000 |
0.2993830 |
0.0000000 |
| SE |
1.0000000 |
2.0000000 |
0.3167615 |
0.0000000 |
top_param_high(path_summary, dataset_combination, parameter='sd')
ng_vs_v2_1m WMA of subset of genes with high sd
| sd_prop_L_mean_cell |
25.0000000 |
459.0000000 |
0.3381884 |
0.6322244 |
| cv_log.donor |
25.0000000 |
456.0000000 |
0.3397424 |
0.6358152 |
| cv.donor |
25.0000000 |
455.0000000 |
0.3381732 |
0.6379984 |
| inv_variance.cell |
24.0000000 |
450.0000000 |
0.3464770 |
0.6608746 |
| inv_variance.donor |
26.0000000 |
450.0000000 |
0.3475537 |
0.6586948 |
| Sample.size. |
22.0000000 |
443.0000000 |
0.3493605 |
0.6513082 |
| SE |
26.0000000 |
509.0000000 |
0.3638315 |
0.6446366 |
top_param_low(path_summary, dataset_combination, parameter='sd')
ng_vs_v2_1m WMA of subset of genes with low sd
| Sample.size. |
1.0000000 |
2.0000000 |
0.2834610 |
0.0000000 |
| mean.donor |
1.0000000 |
2.0000000 |
0.2855699 |
0.0000000 |
| sd.donor |
1.0000000 |
2.0000000 |
0.2843085 |
0.0000000 |
| variance.donor |
1.0000000 |
2.0000000 |
0.2785106 |
0.0000000 |
| mean_log.donor |
1.0000000 |
2.0000000 |
0.2855714 |
0.0000000 |
| Sample.size.1 |
1.0000000 |
2.0000000 |
0.2834610 |
0.0000000 |
| SE |
1.0000000 |
2.0000000 |
0.3063411 |
0.0000000 |
STEMI v3 & 1M v3
dataset_combination ='stemi_v3_vs_v3_1m'
top_param_high(path_summary, dataset_combination, parameter='length')
stemi_v3_vs_v3_1m WMA of subset of genes with high
length
| mean_donor_L_variance_cell |
5.0000000 |
26.0000000 |
0.3166268 |
0.6195197 |
| mean_donor_L_sd_cell |
5.0000000 |
26.0000000 |
0.3087805 |
0.7052360 |
| zeros_log.cell |
4.0000000 |
25.0000000 |
0.2868697 |
0.3418463 |
| avr_SD_donor |
4.0000000 |
25.0000000 |
0.2789740 |
0.3607516 |
| mean_cell_L_varience_cell |
4.0000000 |
25.0000000 |
0.2995581 |
0.3817209 |
| Sample.size. |
4.0000000 |
23.0000000 |
0.2805184 |
0.3170239 |
| SE |
2.0000000 |
12.0000000 |
0.3168434 |
-0.3777836 |
top_param_low(path_summary, dataset_combination, parameter='length')
stemi_v3_vs_v3_1m WMA of subset of genes with low
length
| zeros.cell |
8.0000000 |
85.0000000 |
0.2474593 |
0.7900599 |
| variance.donor |
6.0000000 |
55.0000000 |
0.2689472 |
0.8955783 |
| variance_log.donor |
6.0000000 |
55.0000000 |
0.2689796 |
0.8955982 |
| sd.donor |
6.0000000 |
52.0000000 |
0.2643972 |
0.8907365 |
| sd_log.donor |
6.0000000 |
46.0000000 |
0.2643250 |
0.8825832 |
| Sample.size. |
1.0000000 |
5.0000000 |
0.2403568 |
0.7811554 |
| SE |
5.0000000 |
49.0000000 |
0.2304147 |
0.9725329 |
top_param_high(path_summary, dataset_combination, parameter='gc')
stemi_v3_vs_v3_1m WMA of subset of genes with high gc
| mean_donor_sd_prop |
6.0000000 |
44.0000000 |
0.2653018 |
0.9232801 |
| Sample.size. |
4.0000000 |
41.0000000 |
0.2154790 |
0.9158282 |
| mean.cell |
4.0000000 |
41.0000000 |
0.2154350 |
0.9214766 |
| sd.cell |
4.0000000 |
41.0000000 |
0.2080846 |
0.9150221 |
| variance.cell |
4.0000000 |
41.0000000 |
0.2004603 |
0.9141595 |
| Sample.size.1 |
4.0000000 |
41.0000000 |
0.2154790 |
0.9158282 |
| SE |
0.0000000 |
0.0000000 |
0.2729563 |
0.0000000 |
top_param_low(path_summary, dataset_combination, parameter='gc')
stemi_v3_vs_v3_1m WMA of subset of genes with low gc
| sd_prop_L_mean_cell |
25.0000000 |
795.0000000 |
0.4212361 |
0.8525919 |
| inv_sd.cell |
24.0000000 |
776.0000000 |
0.4453924 |
0.8451554 |
| inv_variance.cell |
24.0000000 |
773.0000000 |
0.4460672 |
0.8399818 |
| mean_cell_L_varience_cell |
25.0000000 |
735.0000000 |
0.4555651 |
0.8645287 |
| mean_donor_L_variance_cell |
25.0000000 |
721.0000000 |
0.4759422 |
0.8568491 |
| Sample.size. |
12.0000000 |
166.0000000 |
0.4419067 |
0.8739260 |
| SE |
24.0000000 |
734.0000000 |
0.4678853 |
0.8054725 |
top_param_high(path_summary, dataset_combination, parameter='mean')
stemi_v3_vs_v3_1m WMA of subset of genes with high
mean
| variance.donor |
2.000000e+00 |
3.000000e+01 |
2.247827e-01 |
9.809085e-02 |
| zeros.cell |
8.0000000 |
142.0000000 |
0.2767575 |
0.5049588 |
| mean_donor_L_sd_donor |
8.0000000 |
131.0000000 |
0.2909270 |
0.4776824 |
| mean_donor_L_sd_prop |
8.0000000 |
131.0000000 |
0.2909270 |
0.4776824 |
| mean_donor_L_variance_cell |
8.0000000 |
129.0000000 |
0.2784009 |
0.4515029 |
| Sample.size. |
1.0000000 |
29.0000000 |
0.2698677 |
-0.3928951 |
| SE |
5.0000000 |
101.0000000 |
0.2494954 |
0.3133391 |
top_param_low(path_summary, dataset_combination, parameter='mean')
stemi_v3_vs_v3_1m WMA of subset of genes with low
mean
| mean_donor_sd_prop |
1.0000000 |
9.0000000 |
0.2772362 |
0.6512968 |
| mean.donor |
1.0000000 |
4.0000000 |
0.2671629 |
0.0000000 |
| sd.donor |
1.0000000 |
4.0000000 |
0.2611781 |
0.0000000 |
| variance.donor |
1.0000000 |
4.0000000 |
0.2785881 |
0.0000000 |
| mean_log.donor |
1.0000000 |
4.0000000 |
0.2671462 |
0.0000000 |
| Sample.size. |
0.0000000 |
0.0000000 |
0.2148669 |
0.0000000 |
| SE |
1.0000000 |
9.0000000 |
0.2422744 |
0.6512968 |
top_param_high(path_summary, dataset_combination, parameter='sd')
stemi_v3_vs_v3_1m WMA of subset of genes with high sd
| zeros.cell |
8.0000000 |
88.0000000 |
0.3626497 |
0.9362700 |
| inv_variance.cell |
7.0000000 |
79.0000000 |
0.3426494 |
0.9268460 |
| mean_donor_L_variance_cell |
8.0000000 |
78.0000000 |
0.3656688 |
0.9437620 |
| mean_donor_L_sd_cell |
7.0000000 |
74.0000000 |
0.3643666 |
0.9248328 |
| mean_donor_L_varience_donor |
6.0000000 |
72.0000000 |
0.3884618 |
0.9312092 |
| Sample.size. |
1.0000000 |
10.0000000 |
0.3403024 |
0.9992667 |
| SE |
7.0000000 |
82.0000000 |
0.3436007 |
0.9381732 |
top_param_low(path_summary, dataset_combination, parameter='sd')
stemi_v3_vs_v3_1m WMA of subset of genes with low sd
| Sample.size. |
0.0000000 |
0.0000000 |
0.1644582 |
0.0000000 |
| mean.donor |
0.0000000 |
0.0000000 |
0.1866382 |
0.0000000 |
| sd.donor |
0.0000000 |
0.0000000 |
0.1918760 |
0.0000000 |
| variance.donor |
0.0000000 |
0.0000000 |
0.1953173 |
0.0000000 |
| cv.donor |
0.0000000 |
0.0000000 |
0.1573711 |
0.0000000 |
| Sample.size.1 |
0.0000000 |
0.0000000 |
0.1644582 |
0.0000000 |
| SE |
0.0000000 |
0.0000000 |
0.1309707 |
0.0000000 |