Analyzing each dataset
WMA efficiency estimate 5ds
- Testing if the differences between summary statistics is significant
Weights and weighting characteristics
WMA estimate for pairwise weighting
- Between chemistries
- Between chemistries
Grid search
- functions
- results
WMA on subset of genes

Analyzing each dataset

library(DT)
library(UpSetR)

path_with_WMA <- '/Users/korshe/Documents/Data_Groningen/pbmc_5ds/'
dataset_ids <- c('stemi_v2','stemi_v3','v2_1m','v3_1m', 'ng')

tab_eqtls <- read.table(paste(path_with_WMA,'Sample.size._tab.tsv',sep=''), sep='\t', header=T)
tab <- tab_eqtls
for (dataset_id in dataset_ids){
#tab<- tab %>% group_by(bulk.feature_id) %>%   arrange(cFDR_empirical_feature_p_value)
tab <- tab[order(tab[,paste0('p_value_', dataset_id )], decreasing = F),]
tab_bh = multtest::mt.rawp2adjp(c("BH","Bonferroni"),rawp = tab[,paste0('p_value_', dataset_id )])$adjp
tab[,paste0('BH_', dataset_id )] <- tab_bh[,2]
tab[,paste0('Bonf_', dataset_id )] <- tab_bh[,3]
}


UpSetR::upset(fromList(list(NG= tab[tab$BH_ng <0.05, ]$snp_gene, 
                            STEMI_v2 = tab[tab$BH_stemi_v2 <0.05, ]$snp_gene,
                            STEMI_v3 = tab[tab$BH_stemi_v3 <0.05, ]$snp_gene,
                            V2_1M = tab[tab$BH_v2_1m <0.05, ]$snp_gene,
                            V3_1M = tab[tab$BH_v3_1m <0.05, ]$snp_gene
                            # ,
                            # WMA_SS = tab[tab$BH_ZW_ss <0.05, ]$snp_gene,
                            # WMA_SE = tab[tab$BH_ZW_SE <0.05, ]$snp_gene
                            )), 
              set_size.show = T, order.by = 'freq', nintersects = 60,nsets = 20,sets.x.label = 'BH eQTLs')

UpSetR::upset(fromList(list(NG= tab[tab$BH_ng <0.05, ]$feature_id, 
                            STEMI_v2 = tab[tab$BH_stemi_v2 <0.05, ]$feature_id,
                            STEMI_v3 = tab[tab$BH_stemi_v3 <0.05, ]$feature_id,
                            V2_1M = tab[tab$BH_v2_1m <0.05, ]$feature_id,
                            V3_1M = tab[tab$BH_v3_1m <0.05, ]$feature_id
                            # ,
                            # WMA_SS = tab[tab$BH_ZW_ss <0.05, ]$feature_id,
                            # WMA_SE = tab[tab$BH_ZW_SE <0.05, ]$feature_id
                            )), 
              set_size.show = T, order.by = 'freq', nintersects = 60,nsets = 20,sets.x.label = 'BH eGenes')

UpSetR::upset(fromList(list(NG= tab[tab$Bonf_ng <0.05, ]$snp_gene, 
                            STEMI_v2 = tab[tab$Bonf_stemi_v2 <0.05, ]$snp_gene,
                            STEMI_v3 = tab[tab$Bonf_stemi_v3 <0.05, ]$snp_gene,
                            V2_1M = tab[tab$Bonf_v2_1m <0.05, ]$snp_gene,
                            V3_1M = tab[tab$Bonf_v3_1m <0.05, ]$snp_gene
                            # ,
                            # WMA_SS = tab[tab$Bonf_ZW_ss <0.05, ]$snp_gene,
                            # WMA_SE = tab[tab$Bonf_ZW_SE <0.05, ]$snp_gene
                            )), 
              set_size.show = T, order.by = 'freq', nintersects = 60,nsets = 20,sets.x.label = 'Bonf eQTLs')

UpSetR::upset(fromList(list(NG= tab[tab$Bonf_ng <0.05, ]$feature_id, 
                            STEMI_v2 = tab[tab$Bonf_stemi_v2 <0.05, ]$feature_id,
                            STEMI_v3 = tab[tab$Bonf_stemi_v3 <0.05, ]$feature_id,
                            V2_1M = tab[tab$Bonf_v2_1m <0.05, ]$feature_id,
                            V3_1M = tab[tab$Bonf_v3_1m <0.05, ]$feature_id
                            # ,
                            # WMA_SS = tab[tab$Bonf_ZW_ss <0.05, ]$feature_id,
                            # WMA_SE = tab[tab$Bonf_ZW_SE <0.05, ]$feature_id
                            )), 
              set_size.show = T, order.by = 'freq', nintersects = 60,nsets = 20,sets.x.label = 'Bonf eGenes')

## Expression parameters

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(tidyr)
library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

param = 'mean.donor'
summary_stat_filteres <- tab
get_plot_for_values <- function(summary_stat_filteres,param){
  #summary_stat_filteres <- summary_stat_filteres[1:500,]
  param_cols <-c(paste0(param, '_stemi_v2'),
                 paste0(param, '_stemi_v3'),
                 paste0(param, '_v2_1m'),
                 paste0(param, '_v3_1m'),
                 paste0(param, '_ng'))

    summary_stat_filteres_par <- summary_stat_filteres %>%  select(one_of(param_cols), snp_gene)
  
  summary_stat_filteres_par <- summary_stat_filteres_par[sample(nrow(summary_stat_filteres_par), 1500), ] #sample(summary_stat_filteres_par, 1000)
  colnames(summary_stat_filteres_par) <-  gsub(param, "", colnames(summary_stat_filteres_par))
  colnames(summary_stat_filteres_par) <-  gsub("_", "", colnames(summary_stat_filteres_par))
  
  colnames(summary_stat_filteres_par)[ colnames(summary_stat_filteres_par) == 'stemiv2'] <- 'van_Blokland_v2'
  colnames(summary_stat_filteres_par)[ colnames(summary_stat_filteres_par) == 'stemiv3'] <- 'van_Blokland_v3'
  colnames(summary_stat_filteres_par)[ colnames(summary_stat_filteres_par) == 'ng'] <- 'van_der_Wijst_v2'
  colnames(summary_stat_filteres_par)[ colnames(summary_stat_filteres_par) == 'v21m'] <- 'Oelen_v2'
  colnames(summary_stat_filteres_par)[ colnames(summary_stat_filteres_par) == 'v31m'] <- 'Oelen_v3'
  
  summary_stat_filteres_par <- summary_stat_filteres_par[order(as.numeric(summary_stat_filteres_par$van_der_Wijst_v2)), ]
  summary_stat_filteres_par$gene <- 1:length(summary_stat_filteres_par$snpgene)
  data_long <- gather(summary_stat_filteres_par, condition, measurement, 1:5, factor_key=TRUE)
  data_long$measurement <- as.numeric(data_long$measurement)
  print(param)
  #data_long <- data_long[order(data_long$measurement),]
  plot1<- 
    ggplot(data = data_long, aes(x = gene, y = measurement, color = condition)) +  geom_line()  +theme_light()  + 
    scale_x_continuous(name="Gene id")
  
  
  plot2<-ggplot(data = data_long,
                mapping = aes(x = gene, y = measurement, color = condition)) +theme_light() +
    geom_line() +
    facet_grid(rows = vars(condition)) + ggtitle(param) 


  plot3 <- ggplot(data_long, aes(x = measurement, color = condition)) + theme_light() + geom_density(alpha=.3) + 
  scale_x_continuous(name=param, limits=c(0, 6))
  # grid.arrange(plot1,plot2, plot3, ncol = 3)
  # grid.arrange(plot1, plot3, ncol = 2)
  grid.arrange(plot3, plot2, ncol = 2)
  
  #grid.arrange(plot1p,plot2p)
}

get_plot_for_values(summary_stat_filteres,param='mean.donor')

## [1] "mean.donor"

get_plot_for_values(summary_stat_filteres,param='mean.cell')

## [1] "mean.cell"

get_plot_for_values(summary_stat_filteres,param='sd.donor')

## [1] "sd.donor"

get_plot_for_values(summary_stat_filteres,param='sd.cell')

## [1] "sd.cell"

get_plot_for_values(summary_stat_filteres,param='sd_prop')

## [1] "sd_prop"

## Warning: Removed 37 rows containing non-finite values (stat_density).

WMA efficiency estimate 5ds

weights <- read.table(paste(path_with_WMA,'list_of_weighting_ids.tsv',sep=''), sep='\t', header=T)
weights$x

##  [1] "mean.donor"                  "sd.donor"                   
##  [3] "variance.donor"              "cv.donor"                   
##  [5] "mean_log.donor"              "sd_log.donor"               
##  [7] "variance_log.donor"          "cv_log.donor"               
##  [9] "mean.cell"                   "sd.cell"                    
## [11] "variance.cell"               "cv.cell"                    
## [13] "zeros.cell"                  "mean_log.cell"              
## [15] "sd_log.cell"                 "variance_log.cell"          
## [17] "cv_log.cell"                 "zeros_log.cell"             
## [19] "avr_SD_donor"                "inv_variance.cell"          
## [21] "inv_variance.donor"          "mean_donor_L_varience_donor"
## [23] "mean_cell_L_varience_cell"   "mean_donor_L_variance_cell" 
## [25] "mean_cell_L_variance_donor"  "inv_sd.cell"                
## [27] "inv_sd.donor"                "mean_donor_L_sd_donor"      
## [29] "mean_cell_L_sd_cell"         "mean_donor_L_sd_cell"       
## [31] "mean_cell_L_sd_donor"        "sd_prop"                    
## [33] "mean_cell_sd_prop"           "mean_cell_L_sd_prop"        
## [35] "mean_donor_sd_prop"          "mean_donor_L_sd_prop"       
## [37] "sd_prop_L_mean_cell"

dataset_characteristics <- read.table(paste(path_with_WMA,'PBMC_dataset_specifications.csv',sep=''), sep=',', header=T)
datatable(dataset_characteristics,extensions = "Buttons", 
            options = list(paging = TRUE,
                           scrollX=TRUE, 
                           searching = TRUE,
                           ordering = TRUE,
                           dom = 'Bfrtip',
                           buttons = c('copy', 'csv', 'excel', 'pdf') ))

Testing if the differences between summary statistics is significant

t.test(tab$OverallZScore_stemi_v3,tab$OverallZScore_stemi_v2,alternative="two.sided", conf.level=0.95)

## 
##  Welch Two Sample t-test
## 
## data:  tab$OverallZScore_stemi_v3 and tab$OverallZScore_stemi_v2
## t = 6.4791, df = 468306, p-value = 9.235e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01462705 0.02731466
## sample estimates:
##    mean of x    mean of y 
##  0.019233233 -0.001737625

t.test(tab$OverallZScore_v2_1m,tab$OverallZScore_v3_1m,alternative="two.sided", conf.level=0.95)

## 
##  Welch Two Sample t-test
## 
## data:  tab$OverallZScore_v2_1m and tab$OverallZScore_v3_1m
## t = -0.026983, df = 468730, p-value = 0.9785
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.006931284  0.006743032
## sample estimates:
##    mean of x    mean of y 
## -0.007809981 -0.007715855

t.test(tab$OverallZScore_v2_1m,tab$ng,alternative="two.sided", conf.level=0.95)

## 
##  One Sample t-test
## 
## data:  tab$OverallZScore_v2_1m
## t = -3.1317, df = 234478, p-value = 0.001738
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.012697905 -0.002922057
## sample estimates:
##    mean of x 
## -0.007809981

t.test(tab$OverallZScore_v3_1m,tab$ng,alternative="two.sided", conf.level=0.95)

## 
##  One Sample t-test
## 
## data:  tab$OverallZScore_v3_1m
## t = -3.1633, df = 234483, p-value = 0.00156
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.012496563 -0.002935147
## sample estimates:
##    mean of x 
## -0.007715855

t.test(tab$OverallZScore_v2_1m,tab$OverallZScore_stemi_v2,alternative="two.sided", conf.level=0.95)

## 
##  Welch Two Sample t-test
## 
## data:  tab$OverallZScore_v2_1m and tab$OverallZScore_stemi_v2
## t = -1.8087, df = 463958, p-value = 0.07049
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.0126524541  0.0005077419
## sample estimates:
##    mean of x    mean of y 
## -0.007809981 -0.001737625

t.test(tab$OverallZScore_v3_1m,tab$OverallZScore_stemi_v3,alternative="two.sided", conf.level=0.95)

## 
##  Welch Two Sample t-test
## 
## data:  tab$OverallZScore_v3_1m and tab$OverallZScore_stemi_v3
## t = -7.9908, df = 467934, p-value = 1.344e-15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03355915 -0.02033903
## sample estimates:
##    mean of x    mean of y 
## -0.007715855  0.019233233

Weights and weighting characteristics

Check what it the most optimal weighting using n eQTLs BH-corrected

library(DT)
path_with_WMA <- '/Users/korshe/Documents/Data_Groningen/pbmc_5ds/'

BH <- read.table(paste(path_with_WMA,'BH','.tsv',sep=''), sep='\t', header=T)
BH <- BH[order(BH$eQTLs),]
BH <- BH[,c(10,11,1:9)]
DT::datatable(BH,extensions = "Buttons", 
            options = list(paging = TRUE,
                           scrollX=TRUE, 
                           searching = TRUE,
                           ordering = TRUE,
                           dom = 'Bfrtip',
                           buttons = c('copy', 'csv', 'excel', 'pdf') ))

Check what it the most optimal weighting using n eQTLs Bonferroni-corrected

Bonf <- read.table(paste(path_with_WMA,'Bonf','.tsv',sep=''), sep='\t', header=T)
Bonf <- Bonf[order(Bonf$eQTLs),]
Bonf <- Bonf[,c(10,11,1:9)]
DT::datatable(Bonf,extensions = "Buttons", 
            options = list(paging = TRUE,
                           scrollX=TRUE, 
                           searching = TRUE,
                           ordering = TRUE,
                           dom = 'Bfrtip',
                           buttons = c('copy', 'csv', 'excel', 'pdf') ))

library(viridis)

## Loading required package: viridisLite

tab <- tab[complete.cases(tab),]
get_density <- function(x, y, ...) {
  dens <- MASS::kde2d(x, y, ...)
  ix <- findInterval(x, dens$x)
  iy <- findInterval(y, dens$y)
  ii <- cbind(ix, iy)
  return(dens$z[ii])
}

tab$density <- get_density(tab$ZW_weight_ss, tab$OverallZScore.eqtlgen_ng, n = 105)

ggplot(tab) + geom_point(aes(tab$ZW_weight_ss, tab$OverallZScore.eqtlgen_ng, color = density))  + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.background = element_blank(),axis.line = element_line(colour = "grey")) +  labs(y = "ZScore from eQTLGen", x = "Weighted ZScore (SS)")  +  scale_color_viridis()

## Warning: Use of `tab$ZW_weight_ss` is discouraged. Use `ZW_weight_ss` instead.

## Warning: Use of `tab$OverallZScore.eqtlgen_ng` is discouraged. Use
## `OverallZScore.eqtlgen_ng` instead.

ggplot(tab) + geom_point(aes(tab$ZW_weight_SE, tab$OverallZScore.eqtlgen_ng, color = density))  + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.background = element_blank(),axis.line = element_line(colour = "grey")) +  labs(y = "ZScore from eQTLGen", x = "Weighted ZScore (SE)")  +  scale_color_viridis()

## Warning: Use of `tab$ZW_weight_SE` is discouraged. Use `ZW_weight_SE` instead.
## Use of `tab$OverallZScore.eqtlgen_ng` is discouraged. Use `OverallZScore.eqtlgen_ng` instead.

WMA estimate for pairwise weighting

path_wma_pairwise <- '/Users/korshe/Documents/Data_Groningen/pbmc_pairwise_2022/ALL/'
datset_combination = 'ng_vs_stemi_v2'

tab_prep <- function(path_wma_pairwise, datset_combination){
ng_stemi_v2 <- read.table(paste(path_wma_pairwise,'ALL_',datset_combination,'.tsv',sep=''), sep='\t', header=T)
ng_stemi_v2= ng_stemi_v2[order(ng_stemi_v2$Bonf_genes,ng_stemi_v2$Bonf_TP,decreasing = c(T,T)),]
ng_stemi_v2["rank_genes"] = c(1:nrow(ng_stemi_v2))
ng_stemi_v2$weight <- paste(ng_stemi_v2$ss, gsub('[0-9]+', '', rownames(ng_stemi_v2) ), sep='_')
return(ng_stemi_v2)
}

Between chemistries

ng_vs_stemi_v2 <- tab_prep(path_wma_pairwise, datset_combination)

datatable(ng_vs_stemi_v2[,c(1:2,4,6, 10,11)],extensions = "Buttons", 
            options = list(buttons = c('copy', 'csv', 'excel', 'pdf') ))

ng_vs_v2_1m <- tab_prep(path_wma_pairwise, datset_combination='ng_vs_v2_1m')

datatable(ng_vs_v2_1m[,c(1:2,4,6, 10,11)],extensions = "Buttons", 
            options = list(buttons = c('copy', 'csv', 'excel', 'pdf') ))

stemi_v3_vs_v3_1m <- tab_prep(path_wma_pairwise, datset_combination='stemi_v3_vs_v3_1m')

datatable(stemi_v3_vs_v3_1m[,c(1:2,4,6, 10,11)],extensions = "Buttons", 
            options = list(buttons = c('copy', 'csv', 'excel', 'pdf') ))

#library(matrixStats)
inter_chemistries_agr_rank <- merge(ng_vs_stemi_v2[,c('weight','rank_genes') ],ng_vs_v2_1m[,c('weight','rank_genes')],by= 'weight')
inter_chemistries_agr_rank <- merge(inter_chemistries_agr_rank,stemi_v3_vs_v3_1m[,c('weight','rank_genes')],by= 'weight')
inter_chemistries_agr_rank$weight <- gsub('.tsv', '', inter_chemistries_agr_rank$weight )
rownames(inter_chemistries_agr_rank) <- inter_chemistries_agr_rank$weight
inter_chemistries_agr_rank$weight <- NULL
inter_chemistries_agr_rank$agregated_rank <- rowSums(inter_chemistries_agr_rank)
inter_chemistries_agr_rank <- inter_chemistries_agr_rank[order(inter_chemistries_agr_rank$agregated_rank),]

datatable(inter_chemistries_agr_rank,extensions = "Buttons", 
            options = list(paging = TRUE,
                           scrollX=TRUE, 
                           searching = TRUE,
                           ordering = TRUE,
                           dom = 'Bfrtip',
                           buttons = c('copy', 'csv', 'excel', 'pdf') ))

Between chemistries

stemi_v3_vs_stemi_v2 <- tab_prep(path_wma_pairwise, datset_combination='stemi_v3_vs_stemi_v2')

datatable(stemi_v3_vs_stemi_v2[,c(1:2,4,6, 10,11)],extensions = "Buttons", 
            options = list(buttons = c('copy', 'csv', 'excel', 'pdf') ))

ng_vs_v3_1m <- tab_prep(path_wma_pairwise, datset_combination='ng_vs_v3_1m')

datatable(ng_vs_v3_1m[,c(1:2,4,6, 10,11)],extensions = "Buttons", 
            options = list(buttons = c('copy', 'csv', 'excel', 'pdf') ))

v3_1m_vs_v2_1m <- tab_prep(path_wma_pairwise, datset_combination='v3_1m_vs_v2_1m')

datatable(v3_1m_vs_v2_1m[,c(1:2,4,6, 10,11)],extensions = "Buttons", 
            options = list(buttons = c('copy', 'csv', 'excel', 'pdf') ))

Getting aggregated rank

#library(matrixStats)
between_chemistries_agr_rank <- merge(v3_1m_vs_v2_1m[,c('weight','rank_genes') ],ng_vs_v3_1m[,c('weight','rank_genes')],by= 'weight')
between_chemistries_agr_rank <- merge(between_chemistries_agr_rank,stemi_v3_vs_stemi_v2[,c('weight','rank_genes')],by= 'weight')
between_chemistries_agr_rank$weight <- gsub('.tsv', '', between_chemistries_agr_rank$weight )
rownames(between_chemistries_agr_rank) <- between_chemistries_agr_rank$weight
between_chemistries_agr_rank$weight <- NULL
between_chemistries_agr_rank$agregated_rank <- rowSums(between_chemistries_agr_rank)
between_chemistries_agr_rank <- between_chemistries_agr_rank[order(between_chemistries_agr_rank$agregated_rank),]

datatable(between_chemistries_agr_rank,extensions = "Buttons", 
            options = list(paging = TRUE,
                           scrollX=TRUE, 
                           searching = TRUE,
                           ordering = TRUE,
                           dom = 'Bfrtip',
                           buttons = c('copy', 'csv', 'excel', 'pdf') ))

Grid search

functions

dataset_combination <- 'ng_vs_stemi_v2'
grid_path <- '/Users/korshe/Documents/Data_Groningen/pbmc_pairwise_2022/grid_search/'

heatmap_fun <- function(grid_path,dataset_combination){
grid_tab <- read.table(paste(grid_path,'Grid_summary_',dataset_combination,'.tsv',sep=''), sep='\t', header=T)
dim(grid_tab)
grid_tab$param <- NULL
grid_tab <- grid_tab[order(grid_tab$grid),]
grid_tab <- grid_tab[!duplicated(grid_tab$grid),]
rownames(grid_tab) <- paste(grid_tab$grid)
grid_tab$grid <- NULL
nba_heatmap <- heatmap(t(as.matrix(grid_tab)),Colv = NA, Rowv = NA, scale="column", main = paste(dataset_combination, 'Grid search'))
}

 library(knitr)

show_most_optimal_weight <- function(grid_path,dataset_combination){
grid_tab <- read.table(paste(grid_path,'Grid_summary_',dataset_combination,'.tsv',sep=''), sep='\t', header=T)
grid_tab$param <- NULL
grid_tab <- grid_tab[order(grid_tab$SS, decreasing = T),]
SS_grid <- grid_tab[1:5, c('grid','SS')]
#grid_tab <- grid_tab[grid_tab$grid < 15,]
grid_tab_long <- gather(grid_tab, weight, neQTLs, 1:38, factor_key=TRUE)
grid_tab_long <- grid_tab_long[order(grid_tab_long$neQTLs, decreasing = T),]
head(grid_tab_long)
max(grid_tab)
# knitr::kable(SS_grid,  caption = paste(dataset_combination, 'SS-based WMA'))
# knitr::kable(grid_tab_long[1:20,],  caption = paste(dataset_combination, 'Top grid with other params'))
 t1 <- head(SS_grid)
 t2 <- head(grid_tab_long[1:20,])
 print(kable(t1),caption = paste(dataset_combination, 'SS-based WMA'))
 print(kable(t2), caption = paste(dataset_combination, 'Top grid with other parameters'))
}

#show_most_optimal_weight(grid_path, dataset_combination)

library(dplyr)
library(ggplot2)
library(tidyr)

line_fun <- function(grid_path,dataset_combination){
grid_tab <- read.table(paste(grid_path,'Grid_summary_',dataset_combination,'.tsv',sep=''), sep='\t', header=T)
dim(grid_tab)
grid_tab$param <- NULL
grid_tab <- grid_tab[order(grid_tab$grid),]
grid_tab <- grid_tab[!duplicated(grid_tab$grid),]
rownames(grid_tab) <- paste(grid_tab$grid)
# removing columns which have avr score less than SS 

grid_tab_means <- as.data.frame(colMeans(grid_tab))
colnames(grid_tab_means) <- 'mean'
grid_tab_means$weight <- rownames(grid_tab_means)
grid_tab_means_less_SS<- grid_tab_means[grid_tab_means$mean > grid_tab_means[1,1],'weight']
grid_tab_means <- grid_tab_means[order(grid_tab_means$mean, decreasing = T),]
grid_tab <- grid_tab[grid_tab$grid < 10,]
grid_tab_subset_top_5 <- grid_tab[,colnames(grid_tab) %in% c(grid_tab_means$weight[1:5],'SS', 'grid')]
dim(grid_tab_subset_top_5)

grid_tab_long <- gather(grid_tab_subset_top_5, weight, neQTLs, 1:6, factor_key=TRUE)

grid_tab_long %>%  ggplot(aes(x=grid, y=neQTLs, group=weight, color=weight)) + geom_line() + theme_light() + ggtitle(paste(dataset_combination, "top 5 weights and Sample size"))
}

results

dataset_combination = 'ng_vs_stemi_v2'
heatmap_fun(grid_path,dataset_combination)

show_most_optimal_weight(grid_path,dataset_combination)

## 
## 
## |   |      grid|  SS|
## |:--|---------:|---:|
## |3  | 0.0222222| 147|
## |4  | 0.0285714| 147|
## |5  | 0.0400000| 147|
## |2  | 0.0181818| 146|
## |6  | 0.0666667| 120|
## 
## 
## |    |      grid|weight                  | neQTLs|
## |:---|---------:|:-----------------------|------:|
## |274 | 0.1250000|grid_variance.donor     |    149|
## |275 | 0.1333333|grid_variance.donor     |    149|
## |276 | 0.1428571|grid_variance.donor     |    149|
## |277 | 0.1538462|grid_variance.donor     |    149|
## |626 | 0.1250000|grid_variance_log.donor |    149|
## |627 | 0.1333333|grid_variance_log.donor |    149|

line_fun(grid_path,dataset_combination)

dataset_combination = 'ng_vs_stemi_v3'
heatmap_fun(grid_path,dataset_combination)

show_most_optimal_weight(grid_path,dataset_combination)

## 
## 
## |   |      grid|  SS|
## |:--|---------:|---:|
## |2  | 0.0181818| 160|
## |3  | 0.0222222| 160|
## |7  | 0.1000000| 145|
## |8  | 0.1052632| 145|
## |9  | 0.1111111| 145|
## 
## 
## |     |      grid|weight                  | neQTLs|
## |:----|---------:|:-----------------------|------:|
## |1872 | 0.2083333|grid_inv_variance.donor |    183|
## |1873 | 0.2173913|grid_inv_variance.donor |    183|
## |1874 | 0.2272727|grid_inv_variance.donor |    183|
## |1875 | 0.2380952|grid_inv_variance.donor |    183|
## |1861 | 0.2500000|grid_inv_variance.donor |    182|
## |1869 | 0.1818182|grid_inv_variance.donor |    182|

line_fun(grid_path,dataset_combination)

dataset_combination = 'ng_vs_v2_1m'
heatmap_fun(grid_path,dataset_combination)

show_most_optimal_weight(grid_path,dataset_combination)

## 
## 
## |   | grid|  SS|
## |:--|----:|---:|
## |53 |  2.0| 378|
## |47 |  1.4| 375|
## |55 |  2.4| 375|
## |56 |  2.6| 375|
## |54 |  2.2| 374|
## 
## 
## |     | grid|weight                   | neQTLs|
## |:----|----:|:------------------------|------:|
## |308  | 15.0|grid_variance.donor      |    389|
## |660  | 15.0|grid_variance_log.donor  |    389|
## |2560 |  3.0|grid_mean_cell_L_sd_cell |    389|
## |103  |  3.6|grid_mean.donor          |    388|
## |104  |  3.8|grid_mean.donor          |    388|
## |108  |  4.0|grid_mean.donor          |    388|

line_fun(grid_path,dataset_combination)

dataset_combination = 'ng_vs_v3_1m'
heatmap_fun(grid_path,dataset_combination)

show_most_optimal_weight(grid_path,dataset_combination)

## 
## 
## |   |      grid|  SS|
## |:--|---------:|---:|
## |40 | 0.7692308| 398|
## |41 | 0.8333333| 398|
## |46 | 1.3000000| 393|
## |47 | 1.4000000| 392|
## |48 | 1.5000000| 392|
## 
## 
## |     | grid|weight                          | neQTLs|
## |:----|----:|:-------------------------------|------:|
## |2115 |  1.3|grid_mean_donor_L_variance_cell |    400|
## |2119 |  1.1|grid_mean_donor_L_variance_cell |    400|
## |2120 |  1.2|grid_mean_donor_L_variance_cell |    400|
## |708  |  1.4|grid_cv_log.donor               |    399|
## |711  |  1.1|grid_cv_log.donor               |    399|
## |712  |  1.2|grid_cv_log.donor               |    399|

line_fun(grid_path,dataset_combination)

dataset_combination = 'v3_1m_vs_stemi_v2'
heatmap_fun(grid_path,dataset_combination)

show_most_optimal_weight(grid_path,dataset_combination)

## 
## 
## |   |      grid|  SS|
## |:--|---------:|---:|
## |38 | 0.6666667| 135|
## |39 | 0.7142857| 135|
## |40 | 0.7692308| 134|
## |42 | 0.9090909| 133|
## |41 | 0.8333333| 132|
## 
## 
## |     |      grid|weight                   | neQTLs|
## |:----|---------:|:------------------------|------:|
## |409  | 0.1250000|grid_cv.donor            |    137|
## |431  | 0.1176471|grid_cv.donor            |    137|
## |3257 | 0.6666667|grid_sd_prop_L_mean_cell |    136|
## |1    | 0.6666667|SS                       |    135|
## |2    | 0.7142857|SS                       |    135|
## |430  | 0.1111111|grid_cv.donor            |    135|

dataset_combination = 'v3_1m_vs_stemi_v3'
heatmap_fun(grid_path,dataset_combination)

show_most_optimal_weight(grid_path,dataset_combination)

## 
## 
## |   |      grid| SS|
## |:--|---------:|--:|
## |2  | 0.0181818| 87|
## |3  | 0.0222222| 87|
## |4  | 0.0285714| 87|
## |5  | 0.0400000| 87|
## |6  | 0.0666667| 87|
## 
## 
## |     |      grid|weight                 | neQTLs|
## |:----|---------:|:----------------------|------:|
## |92   | 0.0400000|grid_mean.donor        |    114|
## |179  | 0.0285714|grid_sd.donor          |    114|
## |444  | 0.0400000|grid_mean_log.donor    |    114|
## |531  | 0.0285714|grid_sd_log.donor      |    114|
## |2906 | 0.0222222|grid_mean_cell_sd_prop |    114|
## |2907 | 0.0285714|grid_mean_cell_sd_prop |    114|

line_fun(grid_path,dataset_combination)

dataset_combination = 'v3_1m_vs_v2_1m'
heatmap_fun(grid_path,dataset_combination)

show_most_optimal_weight(grid_path,dataset_combination)

## 
## 
## |   | grid|  SS|
## |:--|----:|---:|
## |60 |  3.4| 445|
## |59 |  3.2| 444|
## |58 |  3.0| 442|
## |57 |  2.8| 438|
## |56 |  2.6| 437|
## 
## 
## |     | grid|weight                  | neQTLs|
## |:----|----:|:-----------------------|------:|
## |2985 |   95|grid_mean_cell_sd_prop  |    456|
## |2987 |   95|grid_mean_cell_sd_prop  |    456|
## |3161 |   95|grid_mean_donor_sd_prop |    456|
## |3163 |   95|grid_mean_donor_sd_prop |    456|
## |217  |   10|grid_sd.donor           |    455|
## |569  |   10|grid_sd_log.donor       |    455|

line_fun(grid_path,dataset_combination)

WMA on subset of genes

For genes with high GC top three optimal WMA weights are XXX, XXX, XXX.

path_summary <- '/Users/korshe/Documents/Data_Groningen/pbmc_pairwise_2022/subset_of_genes/'
dataset_combination <- 'v2_1m_vs_v3_1m'
parameter <- 'length'

top_param_high <- function(path_summary, dataset_combination, parameter){
  high_tab <- read.table(paste(path_summary,'1_Total_summary_',dataset_combination,'Sample.size.',parameter,'.donor_HIGHT.tsv',sep=''), sep='\t', header=T)
  high_tab_t <- as.data.frame(t(high_tab))
  colnames(high_tab_t) <- high_tab_t[1,]
  high_tab_t<- high_tab_t[-1,]
  
  high_tab_t <- high_tab_t[order(high_tab_t$BH_SS_eQTLs, decreasing = T),]
high_tab_t_3 <- high_tab_t[1:5,c('BH_SS_genes','BH_SS_eQTLs','cor_SS_all','cor_SS_sign')]
high_tab_t_SS <- high_tab_t[rownames(high_tab_t)=="Sample.size.",c('BH_SS_genes','BH_SS_eQTLs','cor_SS_all','cor_SS_sign')]
high_tab_t_SE <- high_tab_t[rownames(high_tab_t)=="Sample.size.",c('BH_SE_genes','BH_SE_eQTLs','cor_SE_all','cor_SE_sign')]
rownames(high_tab_t_SE) <- "SE"
colnames(high_tab_t_SE) <-c('BH_SS_genes','BH_SS_eQTLs','cor_SS_all','cor_SS_sign')
high_tab_t_3 <- rbind(high_tab_t_3,high_tab_t_SS)
high_tab_t_3 <- rbind(high_tab_t_3,high_tab_t_SE)
knitr::kable(high_tab_t_3,  caption = paste(dataset_combination, 'WMA of subset of genes with high', parameter))
}
top_param_low <- function(path_summary, dataset_combination, parameter){
  high_tab <- read.table(paste(path_summary,'1_Total_summary_',dataset_combination,'Sample.size.',parameter,'.donor_LOW.tsv',sep=''), sep='\t', header=T)
  high_tab_t <- as.data.frame(t(high_tab))
  colnames(high_tab_t) <- high_tab_t[1,]
  high_tab_t<- high_tab_t[-1,]
  
  high_tab_t <- high_tab_t[order(high_tab_t$BH_SS_eQTLs, decreasing = T),]
high_tab_t_3 <- high_tab_t[1:5,c('BH_SS_genes','BH_SS_eQTLs','cor_SS_all','cor_SS_sign')]
high_tab_t_SS <- high_tab_t[rownames(high_tab_t)=="Sample.size.",c('BH_SS_genes','BH_SS_eQTLs','cor_SS_all','cor_SS_sign')]
high_tab_t_SE <- high_tab_t[rownames(high_tab_t)=="Sample.size.",c('BH_SE_genes','BH_SE_eQTLs','cor_SE_all','cor_SE_sign')]
rownames(high_tab_t_SE) <- "SE"
colnames(high_tab_t_SE) <-c('BH_SS_genes','BH_SS_eQTLs','cor_SS_all','cor_SS_sign')
high_tab_t_3 <- rbind(high_tab_t_3,high_tab_t_SS)
high_tab_t_3 <- rbind(high_tab_t_3,high_tab_t_SE)
knitr::kable(high_tab_t_3,  caption = paste(dataset_combination, 'WMA of subset of genes with low', parameter))
}

1M v2 & v3

top_param_high(path_summary, dataset_combination, parameter='length')

v2_1m_vs_v3_1m WMA of subset of genes with high length
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
mean_donor_L_sd_donor	12.0000000	105.0000000	0.3830421	0.5124326
mean_cell_L_sd_donor	12.0000000	105.0000000	0.3827140	0.5114275
mean_cell_L_sd_prop	12.0000000	105.0000000	0.3827140	0.5114275
mean_donor_L_sd_prop	12.0000000	105.0000000	0.3830421	0.5124326
sd_prop	13.0000000	102.0000000	0.3849117	0.5409461
Sample.size.	13.0000000	101.0000000	0.3912639	0.5482947
SE	12.0000000	102.0000000	0.3884130	0.5626165

top_param_low(path_summary, dataset_combination, parameter='length')

v2_1m_vs_v3_1m WMA of subset of genes with low length
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
mean_cell_L_sd_cell	17.0000000	182.0000000	0.3446227	0.8886137
mean_donor_L_sd_cell	17.0000000	181.0000000	0.3461420	0.8884718
sd_log.cell	17.0000000	165.0000000	0.3336581	0.8836933
sd.cell	16.0000000	164.0000000	0.3303284	0.8789485
avr_SD_donor	16.0000000	161.0000000	0.3419979	0.8676182
Sample.size.	15.0000000	141.0000000	0.3636316	0.9043358
SE	13.0000000	131.0000000	0.3693670	0.9068768

top_param_high(path_summary, dataset_combination, parameter='gc')

v2_1m_vs_v3_1m WMA of subset of genes with high gc
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
sd_prop	9.0000000	40.0000000	0.3189122	0.9176581
zeros_log.cell	8.0000000	39.0000000	0.3247360	0.9213615
Sample.size.	7.0000000	37.0000000	0.3268219	0.9501444
mean_donor_L_varience_donor	7.0000000	35.0000000	0.2969538	0.8942915
mean_cell_L_variance_donor	7.0000000	35.0000000	0.2967622	0.8946662
Sample.size.1	7.0000000	37.0000000	0.3268219	0.9501444
SE	3.0000000	5.0000000	0.3269741	0.9481789

top_param_low(path_summary, dataset_combination, parameter='gc')

v2_1m_vs_v3_1m WMA of subset of genes with low gc
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
cv_log.cell	48.0000000	1418.0000000	0.5107185	0.8048854
mean_cell_L_varience_cell	47.0000000	1415.0000000	0.5125610	0.8121256
mean_donor_L_variance_cell	47.0000000	1399.0000000	0.5122781	0.8159857
cv.cell	48.0000000	1360.0000000	0.5036711	0.8115379
cv_log.donor	47.0000000	1250.0000000	0.4996034	0.8399226
Sample.size.	49.0000000	1238.0000000	0.5167595	0.8462178
SE	48.0000000	1385.0000000	0.5138727	0.8058381

top_param_high(path_summary, dataset_combination, parameter='mean')

v2_1m_vs_v3_1m WMA of subset of genes with high mean
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
variance.cell	26.0000000	489.0000000	0.2588198	0.4702910
variance_log.cell	24.0000000	481.0000000	0.2563483	0.4639215
mean_log.cell	23.0000000	474.0000000	0.2598463	0.4724762
mean_log.donor	23.0000000	472.0000000	0.2598666	0.4717395
mean.donor	23.0000000	461.0000000	0.2653893	0.4956494
Sample.size.	21.0000000	415.0000000	0.2412562	0.4468668
SE	23.0000000	429.0000000	0.2338139	0.4278087

top_param_low(path_summary, dataset_combination, parameter='mean')

v2_1m_vs_v3_1m WMA of subset of genes with low mean
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
cv_log.donor	4.0000000	19.0000000	0.2364163	0.9426668
mean_cell_L_varience_cell	4.0000000	19.0000000	0.2682027	0.9398309
cv_log.cell	4.0000000	16.0000000	0.2670292	0.8773096
Sample.size.	3.0000000	15.0000000	0.2939085	0.7949185
zeros_log.cell	3.0000000	15.0000000	0.2878997	0.7890365
Sample.size.1	3.0000000	15.0000000	0.2939085	0.7949185
SE	3.0000000	11.0000000	0.2792001	0.8228778

top_param_high(path_summary, dataset_combination, parameter='sd')

v2_1m_vs_v3_1m WMA of subset of genes with high sd
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
mean_donor_L_sd_donor	21.0000000	396.0000000	0.3469961	0.6797311
mean_cell_L_sd_donor	21.0000000	396.0000000	0.3468789	0.6798414
mean_cell_L_sd_prop	21.0000000	396.0000000	0.3468789	0.6798414
mean_donor_L_sd_prop	21.0000000	396.0000000	0.3469961	0.6797311
mean_log.donor	18.0000000	394.0000000	0.3392858	0.6630351
Sample.size.	19.0000000	367.0000000	0.3383879	0.6794109
SE	22.0000000	373.0000000	0.3315468	0.6615580

top_param_low(path_summary, dataset_combination, parameter='sd')

v2_1m_vs_v3_1m WMA of subset of genes with low sd
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
Sample.size.	0.0000000	0.0000000	0.2464812	0.0000000
mean.donor	0.0000000	0.0000000	0.3289821	0.0000000
sd.donor	0.0000000	0.0000000	0.3271387	0.0000000
variance.donor	0.0000000	0.0000000	0.3314561	0.0000000
cv.donor	0.0000000	0.0000000	0.1657964	0.0000000
Sample.size.1	0.0000000	0.0000000	0.2464812	0.0000000
SE	0.0000000	0.0000000	0.2271203	0.0000000

NG v2 & 1M v3

dataset_combination ='ng_vs_v3_1m'
top_param_high(path_summary, dataset_combination, parameter='length')

ng_vs_v3_1m WMA of subset of genes with high length
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
Sample.size.	9.0000000	48.0000000	0.3317558	0.9455553
zeros.cell	9.0000000	48.0000000	0.3339620	0.9428702
zeros_log.cell	9.0000000	48.0000000	0.3322678	0.9454301
sd_prop	9.0000000	47.0000000	0.3267121	0.9465538
mean_donor_L_variance_cell	9.0000000	46.0000000	0.3240715	0.9469643
Sample.size.1	9.0000000	48.0000000	0.3317558	0.9455553
SE	9.0000000	47.0000000	0.3287993	0.9491593

top_param_low(path_summary, dataset_combination, parameter='length')

ng_vs_v3_1m WMA of subset of genes with low length
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
mean_log.donor	11.0000000	79.0000000	0.2617560	0.8558849
sd.cell	12.0000000	78.0000000	0.2748296	0.8664076
mean_log.cell	11.0000000	77.0000000	0.2614956	0.8463045
sd_log.cell	12.0000000	77.0000000	0.2771293	0.8672397
zeros.cell	12.0000000	76.0000000	0.2855283	0.8775576
Sample.size.	10.0000000	47.0000000	0.2994051	0.8635707
SE	11.0000000	54.0000000	0.3064150	0.8584953

top_param_high(path_summary, dataset_combination, parameter='gc')

ng_vs_v3_1m WMA of subset of genes with high gc
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all
Sample.size.	4.0000000	4.0000000	0.3413259
mean.donor	4.0000000	4.0000000	0.3107390
sd.donor	4.0000000	4.0000000	0.3205555
cv.donor	4.0000000	4.0000000	0.3308767
mean_log.donor	4.0000000	4.0000000	0.3113296
Sample.size.1	4.0000000	4.0000000	0.3413259
SE	4.0000000	4.0000000	0.3472766

top_param_low(path_summary, dataset_combination, parameter='gc')

ng_vs_v3_1m WMA of subset of genes with low gc
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
mean_donor_L_sd_donor	35.0000000	949.0000000	0.4795445	0.7801512
mean_donor_L_sd_prop	35.0000000	949.0000000	0.4795445	0.7801512
mean_cell_L_sd_donor	35.0000000	946.0000000	0.4788480	0.7796632
mean_cell_L_sd_prop	35.0000000	946.0000000	0.4788480	0.7796632
zeros_log.cell	34.0000000	945.0000000	0.4729856	0.7750755
Sample.size.	33.0000000	944.0000000	0.4735239	0.7756094
SE	38.0000000	967.0000000	0.4708052	0.7702254

top_param_high(path_summary, dataset_combination, parameter='mean')

ng_vs_v3_1m WMA of subset of genes with high mean
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
inv_variance.cell	20.0000000	564.0000000	0.1882468	0.5218753
inv_sd.donor	20.0000000	525.0000000	0.1932009	0.5618353
mean_donor_L_varience_donor	19.0000000	519.0000000	0.1953610	0.5596007
mean_cell_L_variance_donor	18.0000000	505.0000000	0.1956957	0.5670303
cv.cell	19.0000000	490.0000000	0.1920465	0.5370476
Sample.size.	16.0000000	470.0000000	0.2157812	0.5490202
SE	18.0000000	472.0000000	0.2196725	0.5730657

top_param_low(path_summary, dataset_combination, parameter='mean')

ng_vs_v3_1m WMA of subset of genes with low mean
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
mean.donor	3.0000000	12.0000000	0.3216376	0.9309261
variance.donor	3.0000000	12.0000000	0.3284211	0.9298793
mean_log.donor	3.0000000	12.0000000	0.3216493	0.9309324
variance_log.donor	3.0000000	12.0000000	0.3284211	0.9298793
variance.cell	3.0000000	12.0000000	0.3151593	0.9303253
Sample.size.	2.0000000	6.0000000	0.3416197	0.9999045
SE	1.0000000	1.0000000	0.3438839	0.0000000

top_param_high(path_summary, dataset_combination, parameter='sd')

ng_vs_v3_1m WMA of subset of genes with high sd
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
cv_log.cell	24.0000000	684.0000000	0.3470991	0.7124770
cv.cell	26.0000000	678.0000000	0.3398562	0.7139733
inv_sd.cell	25.0000000	673.0000000	0.3427132	0.7215641
Sample.size.	25.0000000	672.0000000	0.3640568	0.7235983
mean_donor_L_varience_donor	26.0000000	666.0000000	0.3325521	0.7317342
Sample.size.1	25.0000000	672.0000000	0.3640568	0.7235983
SE	23.0000000	659.0000000	0.3659004	0.7381223

top_param_low(path_summary, dataset_combination, parameter='sd')

ng_vs_v3_1m WMA of subset of genes with low sd
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
mean.donor	3.0000000	12.0000000	0.3201567	0.9309261
variance.donor	3.0000000	12.0000000	0.3233939	0.9298793
mean_log.donor	3.0000000	12.0000000	0.3201662	0.9309324
variance_log.donor	3.0000000	12.0000000	0.3233939	0.9298793
variance.cell	3.0000000	12.0000000	0.3158997	0.9303253
Sample.size.	2.0000000	6.0000000	0.3263891	0.9999045
SE	1.0000000	1.0000000	0.3302035	0.0000000

#parameters <- c('gc','length','mean','sd')

NG v2 & STEMI v2

dataset_combination ='ng_vs_stemi_v2'
top_param_high(path_summary, dataset_combination, parameter='length')

ng_vs_stemi_v2 WMA of subset of genes with high length
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
variance.donor	5.0000000	26.0000000	0.2432222	0.7875336
variance_log.donor	5.0000000	26.0000000	0.2432313	0.7875200
zeros.cell	5.0000000	26.0000000	0.2589024	0.8261392
cv_log.cell	5.0000000	26.0000000	0.2630279	0.9357856
mean_cell_L_varience_cell	5.0000000	26.0000000	0.2501131	0.8327964
Sample.size.	4.0000000	25.0000000	0.2625132	0.9304219
SE	5.0000000	26.0000000	0.2597212	0.8299394

top_param_low(path_summary, dataset_combination, parameter='length')

ng_vs_stemi_v2 WMA of subset of genes with low length
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
mean_donor_L_varience_donor	3.0000000	8.0000000	0.2934226	0.9656829
mean_donor_L_sd_donor	3.0000000	8.0000000	0.3109583	0.9671971
mean_donor_L_sd_prop	3.0000000	8.0000000	0.3109583	0.9671971
cv.cell	2.0000000	6.0000000	0.3258072	0.9950748
cv_log.cell	2.0000000	6.0000000	0.3271891	0.9951192
Sample.size.	1.0000000	2.0000000	0.3230321	0.0000000
SE	2.0000000	3.0000000	0.3015067	0.0000000

top_param_high(path_summary, dataset_combination, parameter='gc')

ng_vs_stemi_v2 WMA of subset of genes with high gc
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all
cv.cell	1.0000000	1.0000000	0.2527622
zeros.cell	1.0000000	1.0000000	0.2685889
zeros_log.cell	1.0000000	1.0000000	0.2703194
inv_variance.cell	1.0000000	1.0000000	0.2362931
inv_variance.donor	1.0000000	1.0000000	0.2219016
Sample.size.	0.0000000	0.0000000	0.2702047
SE	1.0000000	1.0000000	0.2627731

top_param_low(path_summary, dataset_combination, parameter='gc')

ng_vs_stemi_v2 WMA of subset of genes with low gc
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
inv_sd.donor	9.0000000	373.0000000	0.3511033	0.9427655
cv.cell	9.0000000	370.0000000	0.3426099	0.9429228
cv_log.donor	11.0000000	363.0000000	0.3321501	0.9400661
sd.cell	8.0000000	348.0000000	0.3302741	0.9543884
sd_log.cell	8.0000000	348.0000000	0.3323872	0.9538529
Sample.size.	8.0000000	315.0000000	0.3535123	0.9504660
SE	2.0000000	21.0000000	0.3570769	0.8130366

top_param_high(path_summary, dataset_combination, parameter='mean')

ng_vs_stemi_v2 WMA of subset of genes with high mean
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
sd_prop	26.0000000	566.0000000	0.1551807	0.4704803
inv_sd.donor	24.0000000	565.0000000	0.1546213	0.4687628
mean_cell_L_varience_cell	27.0000000	561.0000000	0.1615674	0.4762640
inv_sd.cell	25.0000000	561.0000000	0.1612236	0.4760984
avr_SD_donor	23.0000000	558.0000000	0.1525568	0.4704280
Sample.size.	22.0000000	472.0000000	0.1640899	0.5071582
SE	29.0000000	577.0000000	0.1601348	0.4726863

top_param_low(path_summary, dataset_combination, parameter='mean')

ng_vs_stemi_v2 WMA of subset of genes with low mean
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
variance.cell	0.000000e+00	0.000000e+00	9.385707e-02	0.000000e+00
variance_log.cell	0.000000e+00	0.000000e+00	9.387245e-02	0.000000e+00
mean_cell_L_sd_donor	1.0000000	3.0000000	0.1221018	0.0000000
mean_cell_L_sd_prop	1.0000000	3.0000000	0.1221018	0.0000000
Sample.size.	0.0000000	0.0000000	0.1566476	0.0000000
Sample.size.1	0.0000000	0.0000000	0.1566476	0.0000000
SE	0.0000000	0.0000000	0.1696128	0.0000000

top_param_high(path_summary, dataset_combination, parameter='sd')

ng_vs_stemi_v2 WMA of subset of genes with high sd
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
mean_donor_L_sd_donor	26.0000000	589.0000000	0.2857967	0.7401483
mean_donor_L_sd_prop	26.0000000	589.0000000	0.2857967	0.7401483
inv_sd.donor	24.0000000	588.0000000	0.2889890	0.7478492
inv_variance.donor	24.0000000	586.0000000	0.2826482	0.7318523
mean_donor_L_varience_donor	25.0000000	585.0000000	0.2770441	0.7296780
Sample.size.	21.0000000	492.0000000	0.3036785	0.7773670
SE	27.0000000	581.0000000	0.2943974	0.7225017

top_param_low(path_summary, dataset_combination, parameter='sd')

ng_vs_stemi_v2 WMA of subset of genes with low sd
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all
mean_cell_L_sd_donor	1.0000000	3.0000000	0.1298505
mean_cell_L_sd_prop	1.0000000	3.0000000	0.1298505
Sample.size.	0.0000000	0.0000000	0.1505765
mean.donor	0.0000000	0.0000000	0.1595669
sd.donor	0.0000000	0.0000000	0.1633831
Sample.size.1	0.0000000	0.0000000	0.1505765
SE	0.0000000	0.0000000	0.1610945

NG v2 & 1M v2

dataset_combination ='ng_vs_v2_1m'
top_param_high(path_summary, dataset_combination, parameter='length')

ng_vs_v2_1m WMA of subset of genes with high length
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
variance.cell	1.000000e+01	9.800000e+01	3.189260e-01	6.691013e-01
variance_log.cell	1.000000e+01	9.800000e+01	3.191818e-01	6.689081e-01
mean_cell_L_variance_donor	11.0000000	120.0000000	0.3419234	0.6858609
sd_log.cell	11.00000000	119.00000000	0.32801015	0.66921562
mean_donor_L_sd_donor	11.00000000	119.00000000	0.33417925	0.67343220
Sample.size.	9.00000000	115.00000000	0.33444227	0.67846350
SE	13.00000000	93.00000000	0.34774581	0.40433683

top_param_low(path_summary, dataset_combination, parameter='length')

ng_vs_v2_1m WMA of subset of genes with low length
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
mean.donor	8.0000000	33.0000000	0.3498154	0.9096587
sd.donor	8.0000000	33.0000000	0.3437112	0.9094820
variance.donor	8.0000000	33.0000000	0.3156178	0.9196858
mean_log.donor	8.0000000	33.0000000	0.3501954	0.9095520
sd_log.donor	8.0000000	33.0000000	0.3438656	0.9094445
Sample.size.	6.0000000	15.0000000	0.3725321	0.9362740
SE	7.0000000	36.0000000	0.3832980	0.9399126

top_param_high(path_summary, dataset_combination, parameter='gc')

ng_vs_v2_1m WMA of subset of genes with high gc
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all
Sample.size.	1.0000000	1.0000000	0.2958102
mean.donor	1.0000000	1.0000000	0.2929579
sd.donor	1.0000000	1.0000000	0.2881051
variance.donor	1.0000000	1.0000000	0.2715610
cv.donor	1.0000000	1.0000000	0.2884989
Sample.size.1	1.0000000	1.0000000	0.2958102
SE	1.0000000	1.0000000	0.2870843

top_param_low(path_summary, dataset_combination, parameter='gc')

ng_vs_v2_1m WMA of subset of genes with low gc
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
inv_variance.donor	28.0000000	632.0000000	0.4560842	0.8463953
inv_variance.cell	27.00000000	591.00000000	0.46198021	0.90272872
inv_sd.donor	29.00000000	589.00000000	0.46309785	0.86479291
cv.cell	28.00000000	586.00000000	0.46548434	0.86423005
inv_sd.cell	28.00000000	586.00000000	0.46435219	0.86425753
Sample.size.	22.0000000	521.0000000	0.4627987	0.8568584
SE	27.0000000	611.0000000	0.4701973	0.8768237

top_param_high(path_summary, dataset_combination, parameter='mean')

ng_vs_v2_1m WMA of subset of genes with high mean
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
cv.donor	25.0000000	472.0000000	0.1670159	0.4316799
cv_log.donor	25.0000000	472.0000000	0.1685668	0.4325850
cv.cell	24.0000000	472.0000000	0.1732926	0.4391895
sd_prop_L_mean_cell	25.0000000	472.0000000	0.1671250	0.4314487
inv_variance.cell	25.0000000	471.0000000	0.1753585	0.4618821
Sample.size.	21.0000000	457.0000000	0.1792590	0.4667053
SE	24.0000000	518.0000000	0.1870202	0.4226345

top_param_low(path_summary, dataset_combination, parameter='mean')

ng_vs_v2_1m WMA of subset of genes with low mean
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all
Sample.size.	1.0000000	2.0000000	0.2993830
mean.donor	1.0000000	2.0000000	0.2958014
sd.donor	1.0000000	2.0000000	0.2963371
variance.donor	1.0000000	2.0000000	0.2887124
mean_log.donor	1.0000000	2.0000000	0.2958042
Sample.size.1	1.0000000	2.0000000	0.2993830
SE	1.0000000	2.0000000	0.3167615

top_param_high(path_summary, dataset_combination, parameter='sd')

ng_vs_v2_1m WMA of subset of genes with high sd
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
sd_prop_L_mean_cell	25.0000000	459.0000000	0.3381884	0.6322244
cv_log.donor	25.0000000	456.0000000	0.3397424	0.6358152
cv.donor	25.0000000	455.0000000	0.3381732	0.6379984
inv_variance.cell	24.0000000	450.0000000	0.3464770	0.6608746
inv_variance.donor	26.0000000	450.0000000	0.3475537	0.6586948
Sample.size.	22.0000000	443.0000000	0.3493605	0.6513082
SE	26.0000000	509.0000000	0.3638315	0.6446366

top_param_low(path_summary, dataset_combination, parameter='sd')

ng_vs_v2_1m WMA of subset of genes with low sd
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all
Sample.size.	1.0000000	2.0000000	0.2834610
mean.donor	1.0000000	2.0000000	0.2855699
sd.donor	1.0000000	2.0000000	0.2843085
variance.donor	1.0000000	2.0000000	0.2785106
mean_log.donor	1.0000000	2.0000000	0.2855714
Sample.size.1	1.0000000	2.0000000	0.2834610
SE	1.0000000	2.0000000	0.3063411

STEMI v3 & 1M v3

dataset_combination ='stemi_v3_vs_v3_1m'
top_param_high(path_summary, dataset_combination, parameter='length')

stemi_v3_vs_v3_1m WMA of subset of genes with high length
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
mean_donor_L_variance_cell	5.0000000	26.0000000	0.3166268	0.6195197
mean_donor_L_sd_cell	5.0000000	26.0000000	0.3087805	0.7052360
zeros_log.cell	4.0000000	25.0000000	0.2868697	0.3418463
avr_SD_donor	4.0000000	25.0000000	0.2789740	0.3607516
mean_cell_L_varience_cell	4.0000000	25.0000000	0.2995581	0.3817209
Sample.size.	4.0000000	23.0000000	0.2805184	0.3170239
SE	2.0000000	12.0000000	0.3168434	-0.3777836

top_param_low(path_summary, dataset_combination, parameter='length')

stemi_v3_vs_v3_1m WMA of subset of genes with low length
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
zeros.cell	8.0000000	85.0000000	0.2474593	0.7900599
variance.donor	6.0000000	55.0000000	0.2689472	0.8955783
variance_log.donor	6.0000000	55.0000000	0.2689796	0.8955982
sd.donor	6.0000000	52.0000000	0.2643972	0.8907365
sd_log.donor	6.0000000	46.0000000	0.2643250	0.8825832
Sample.size.	1.0000000	5.0000000	0.2403568	0.7811554
SE	5.0000000	49.0000000	0.2304147	0.9725329

top_param_high(path_summary, dataset_combination, parameter='gc')

stemi_v3_vs_v3_1m WMA of subset of genes with high gc
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
mean_donor_sd_prop	6.0000000	44.0000000	0.2653018	0.9232801
Sample.size.	4.0000000	41.0000000	0.2154790	0.9158282
mean.cell	4.0000000	41.0000000	0.2154350	0.9214766
sd.cell	4.0000000	41.0000000	0.2080846	0.9150221
variance.cell	4.0000000	41.0000000	0.2004603	0.9141595
Sample.size.1	4.0000000	41.0000000	0.2154790	0.9158282
SE	0.0000000	0.0000000	0.2729563	0.0000000

top_param_low(path_summary, dataset_combination, parameter='gc')

stemi_v3_vs_v3_1m WMA of subset of genes with low gc
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
sd_prop_L_mean_cell	25.0000000	795.0000000	0.4212361	0.8525919
inv_sd.cell	24.0000000	776.0000000	0.4453924	0.8451554
inv_variance.cell	24.0000000	773.0000000	0.4460672	0.8399818
mean_cell_L_varience_cell	25.0000000	735.0000000	0.4555651	0.8645287
mean_donor_L_variance_cell	25.0000000	721.0000000	0.4759422	0.8568491
Sample.size.	12.0000000	166.0000000	0.4419067	0.8739260
SE	24.0000000	734.0000000	0.4678853	0.8054725

top_param_high(path_summary, dataset_combination, parameter='mean')

stemi_v3_vs_v3_1m WMA of subset of genes with high mean
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
variance.donor	2.000000e+00	3.000000e+01	2.247827e-01	9.809085e-02
zeros.cell	8.0000000	142.0000000	0.2767575	0.5049588
mean_donor_L_sd_donor	8.0000000	131.0000000	0.2909270	0.4776824
mean_donor_L_sd_prop	8.0000000	131.0000000	0.2909270	0.4776824
mean_donor_L_variance_cell	8.0000000	129.0000000	0.2784009	0.4515029
Sample.size.	1.0000000	29.0000000	0.2698677	-0.3928951
SE	5.0000000	101.0000000	0.2494954	0.3133391

top_param_low(path_summary, dataset_combination, parameter='mean')

stemi_v3_vs_v3_1m WMA of subset of genes with low mean
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
mean_donor_sd_prop	1.0000000	9.0000000	0.2772362	0.6512968
mean.donor	1.0000000	4.0000000	0.2671629	0.0000000
sd.donor	1.0000000	4.0000000	0.2611781	0.0000000
variance.donor	1.0000000	4.0000000	0.2785881	0.0000000
mean_log.donor	1.0000000	4.0000000	0.2671462	0.0000000
Sample.size.	0.0000000	0.0000000	0.2148669	0.0000000
SE	1.0000000	9.0000000	0.2422744	0.6512968

top_param_high(path_summary, dataset_combination, parameter='sd')

stemi_v3_vs_v3_1m WMA of subset of genes with high sd
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
zeros.cell	8.0000000	88.0000000	0.3626497	0.9362700
inv_variance.cell	7.0000000	79.0000000	0.3426494	0.9268460
mean_donor_L_variance_cell	8.0000000	78.0000000	0.3656688	0.9437620
mean_donor_L_sd_cell	7.0000000	74.0000000	0.3643666	0.9248328
mean_donor_L_varience_donor	6.0000000	72.0000000	0.3884618	0.9312092
Sample.size.	1.0000000	10.0000000	0.3403024	0.9992667
SE	7.0000000	82.0000000	0.3436007	0.9381732

top_param_low(path_summary, dataset_combination, parameter='sd')

stemi_v3_vs_v3_1m WMA of subset of genes with low sd
	BH_SS_genes	BH_SS_eQTLs	cor_SS_all	cor_SS_sign
Sample.size.	0.0000000	0.0000000	0.1644582	0.0000000
mean.donor	0.0000000	0.0000000	0.1866382	0.0000000
sd.donor	0.0000000	0.0000000	0.1918760	0.0000000
variance.donor	0.0000000	0.0000000	0.1953173	0.0000000
cv.donor	0.0000000	0.0000000	0.1573711	0.0000000
Sample.size.1	0.0000000	0.0000000	0.1644582	0.0000000
SE	0.0000000	0.0000000	0.1309707	0.0000000

WMA_pbmc

2022-08-05

Analyzing each dataset

WMA efficiency estimate 5ds

Testing if the differences between summary statistics is significant

Weights and weighting characteristics

WMA estimate for pairwise weighting

Between chemistries

Between chemistries

Getting aggregated rank

Grid search

functions

results

WMA on subset of genes

1M v2 & v3

NG v2 & 1M v3

NG v2 & STEMI v2

NG v2 & 1M v2

STEMI v3 & 1M v3