# install.packages("devtools")
# library("devtools")
# install_github("whz991026/AEEIP")

library(AEEIP)

load other libraries

Load the other packages:

0.1

simulate data

This section is getting the simulate data with 0.1 mix rate and replicates is 3

# the mix rate is 0.1
set.seed(1)
data <-simulateData(T=10000, hyper_a_1=20,hyper_b_1 =0.2, hyper_a_2=50,hyper_b_2 =0.05,
pure_rate=0.99,mix_rate=0.1,mode=2,replicates=3)

corrected the bias

replicates 1

corrected the bias for the replicates 1

set.seed(1)
# firstly we set the initial tau is near to 0 and let the size of beta to be large enough
# to see the plot
res <- AEEIP_test(r_t_pure=data[[3]][[1]],r_t_mix=data[[4]][[1]],beta=sum(data[[4]][[1]])*1e1,alpha_a=NA,
alpha_b=NA,tau_a_vector=0,l=1,prop_vector=seq(0.01,0.99,by=0.01),
max_iteration =1e6,stop_stepsize=1e-7)

res1 <- AEEIP_test_final(res,r_t_pure=data[[3]][[1]],r_t_mix=data[[4]][[1]],beta=sum(data[[4]][[1]])*1e1,alpha_a=NA,                      alpha_b=NA,tau_a=0,l=1,prop_vector=seq(0.01,0.99,by=0.01),
                     max_iteration =1e6,stop_stepsize=1e-7)
## [1] "the antibody bias is around 0.108844542801534"
res_replicate1 <- res1[[1]]
data.frame_1 <- res1[[2]]
data.frame_2 <- res1[[3]]


plot <- ggplot() + geom_point(data=data.frame_2,aes(x=tau_a_no_bayes,y=diff))+
     geom_hline(aes(yintercept=0), colour="#BB0000", linetype="dashed")

plot

res_replicate1$tau_a_bayes
## [1] 0.1088445

replicates 2

set.seed(1)
# firstly we set the initial tau is near to 0 and let the size of beta to be large enough,
# to see the plot
res <- AEEIP_test(r_t_pure=data[[3]][[2]],r_t_mix=data[[4]][[2]],beta=sum(data[[4]][[2]])*1e1,alpha_a=NA,
alpha_b=NA,tau_a_vector=0,l=1,prop_vector=seq(0.01,0.99,by=0.01),
max_iteration =1e6,stop_stepsize=1e-7)


res1 <- AEEIP_test_final(res,r_t_pure=data[[3]][[2]],r_t_mix=data[[4]][[2]],beta=sum(data[[4]][[2]])*1e1,alpha_a=NA,                      alpha_b=NA,tau_a=0,l=1,prop_vector=seq(0.01,0.99,by=0.01),
                     max_iteration =1e6,stop_stepsize=1e-7)
## [1] "the antibody bias is around 0.108819650014971"
res_replicate2 <- res1[[1]]
data.frame_1 <- res1[[2]]
data.frame_2 <- res1[[3]]


plot <- ggplot() + geom_point(data=data.frame_2,aes(x=tau_a_no_bayes,y=diff))+
     geom_hline(aes(yintercept=0), colour="#BB0000", linetype="dashed")

plot

res_replicate2$tau_a_bayes
## [1] 0.1088197

replicates 3

set.seed(1)
# firstly we set the initial tau is near to 0 and let the size of beta to be large enough,
# to see the plot
res <- AEEIP_test(r_t_pure=data[[3]][[3]],r_t_mix=data[[4]][[3]],beta=sum(data[[4]][[3]])*1e1,alpha_a=NA,
alpha_b=NA,tau_a_vector=0,l=1,prop_vector=seq(0.01,0.99,by=0.01),
max_iteration =1e6,stop_stepsize=1e-7)

res1 <- AEEIP_test_final(res,r_t_pure=data[[3]][[3]],r_t_mix=data[[4]][[3]],beta=sum(data[[4]][[3]])*1e1,alpha_a=NA,                      alpha_b=NA,tau_a=0,l=1,prop_vector=seq(0.01,0.99,by=0.01),
                     max_iteration =1e6,stop_stepsize=1e-7)
## [1] "the antibody bias is around 0.1088061616759"
res_replicate3 <- res1[[1]]
data.frame_1 <- res1[[2]]
data.frame_2 <- res1[[3]]

plot <- ggplot() + geom_point(data=data.frame_2,aes(x=tau_a_no_bayes,y=diff))+
     geom_hline(aes(yintercept=0), colour="#BB0000", linetype="dashed")

plot

res_replicate3$tau_a_bayes
## [1] 0.1088062

use the non_mix real data to run the DESeq2

use DESeq2 to analysis the real data, to see the differential between the input and IP.

    counts <- cbind(as.data.frame(data[[1]]),as.data.frame(data[[2]]))
    design = data.frame(
                    Trt = c(rep("input", dim(as.data.frame(data[[1]]))[2]),
                            rep("IP", dim(as.data.frame(data[[1]]))[2])))
    model = ~  Trt 
    # 
    dds <- DESeq2::DESeqDataSetFromMatrix(countData = counts,
                                colData = design,
                                design = model)
## converting counts to integer mode
    data_size <- as.matrix(counts)

    # first log
    log_data <- log(data_size)
    log_data [is.infinite(log_data)] <- NA
    log_mean <- rowMeans(log_data)
    log_s <- log_data-log_mean

    # then exp
    s_size <- exp(apply(log_s,2,function(x)median(x,na.rm=TRUE)))
    names(s_size) <- NULL
    dds$sizeFactor <- s_size
    # DESeq2::sizeFactors(dds) = s_size

    dds <- DESeq2::DESeq(dds, test = "Wald")
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
    res_DESeq2_real = results(dds)#, name = "Trt_IP_vs_input"
    
    
    res_DESeq2_real_01_1 <- res_DESeq2_real

use the mix data to run the DESeq2

use DESeq2 to analysis the mix data, to see the differential between the input and IP.

    counts <- cbind(as.data.frame(data[[3]]),as.data.frame(data[[4]]))
    design = data.frame(
                    Trt = c(rep("input", dim(as.data.frame(data[[3]]))[2]),
                            rep("IP", dim(as.data.frame(data[[3]]))[2])))
    model = ~  Trt 
    # 
    dds <- DESeqDataSetFromMatrix(countData = counts,
                                colData = design,
                                design = model)
## converting counts to integer mode
    data_size <- as.matrix(counts)
  
  
    # first log
    log_data <- log(data_size)
    log_data [is.infinite(log_data)] <- NA
    log_mean <- rowMeans(log_data)
    log_s <- log_data-log_mean
    
    # then exp
    s_size <- exp(apply(log_s,2,function(x)median(x,na.rm=TRUE)))
    names(s_size) <- NULL
    sizeFactors(dds) = s_size
    
    dds <- DESeq(dds, test = "Wald")
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
    res_DESeq2_mix = DESeq2::results(dds)
    
    res_DESeq2_mix_01_1 <- res_DESeq2_mix

use the corrected mix data to run the DESeq2

input <- as.data.frame(data[[3]])
IP <- as.data.frame(data[[4]])

res_DESeq2_mix_corrected <- DESeq2_AEEIP(list(res_replicate1,res_replicate2,res_replicate3),input,IP)
## converting counts to integer mode
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing

evaluate

# we see the difference is small for the corrected mix data
 

# to see the i=relation between the three results
index_real <- which(res_DESeq2_real$pvalue<=5e-2)
threshold1 <- res_DESeq2_mix_corrected$pvalue[order(res_DESeq2_mix_corrected$pvalue)[length(index_real)]]
threshold2 <- res_DESeq2_mix$pvalue[order(res_DESeq2_mix$pvalue)[length(index_real)]]
index_mix_corrected <- which(res_DESeq2_mix_corrected$pvalue<=threshold1)
index_mix <- which(res_DESeq2_mix$pvalue<=threshold2)
 
index_list <- list(real=index_real,corrected=index_mix_corrected,
                   mix=index_mix)
 

ggVennDiagram(index_list,category.names = c("real","corrected","mix"),
              label = "count", 
              label_color = "black",
              label_alpha = 0,
              edge_lty = "dashed", 
              edge_size = 1) +
  scale_fill_gradient(low="white",high = "#b9292b",name = "gene count")

# we can see the intersection between real data and corrected mix data is better than the mix data.