Load Required Packages

library(ggplot2)
library(dplyr)
library(rstan)
#TABI
if("package:TABI" %in% search()) detach("package:TABI", unload=TRUE, force=TRUE); library(TABI)  

Test on False Positive Data

Note: (True Positive Curve Shape was inspired by results/estimated fit from TABI run on gene NUSAP1).

Create Simulation Data

Create Generalised Sigmoid Function Equation

#Create Generalised Sigmoid Function Equation
eq <- function(x) {
    eta=2.29
    beta=0.9
    y_0=600
    top<- (y_0*(1+exp(eta*beta)))
    bottom<-(1+exp(eta*beta-x*beta))
    return(top/bottom)
}

#For plotting  add + 1 (prevent issues with log transformation)
eq1 <- function(x) {
    eta=2.29
    beta=0.9
    y_0=600
    top<- (y_0*(1+exp(eta*beta)))
    bottom<-(1+exp(eta*beta-x*beta))
    return(top/bottom+1)
    }

#Plot function normal scale

ggplot(data.frame(x=c(0,8)), aes(x=x)) + stat_function(fun=eq1, geom="line")    + labs(title="True Positive Curve", x="CAPRA-S", y="Normalised Gene Count")

#Plot Function log scale
ggplot(data.frame(x=c(0,8)), aes(x=x)) + stat_function(fun=eq1, geom="line") + scale_y_log10()  + labs(title="True Positive Curve (Log Transformed Scale)", x="CAPRA-S", y="Normalised Gene Count")

Create number of samples expected for each CAPRA-S level

#Distribution of CAPRA-S Scores
k<-0
for (i in 0:8) {
f<-PC_TCGA %>% filter(transcript=="NUSAP1")
k[i]<-sum(f$`CAPRA-S`==i, na.rm=T)}
k #Number of sample from Each CAPRA-S Score
## [1] 11 13 23 11 47  5 18  2

Find expected value for each CAPRA-S level (i.e. using the defined equation above)

y<-sapply(0:7, FUN=eq) #Expected Value for each CAPRA_S score

Use these values to simulate test data

#Take stardard normal (multiply by 500, giving sd of 500), then shift so that its mean value is the expected value
#A a particular CAPRA_S score, take n number of random samples from the resulting distribution 
y_sim<-apply(array(c(y,k), dim=c(8,2)), MARGIN=1, FUN=function(x) { rnorm(n=x[2],mean(x[1]),sd=500)})
#Define corresponding CAPRA_S
CAPRA_S<-c(rep(0,11), rep(1,13), rep(2,23), rep(3,11), rep(4,47), rep(5,5), rep(6,18), rep(7,2))
#Resulting Distribution of Simulated Gene Counts
hist(as.integer(unlist(y_sim)), main="Distribution of Simulated Normalised Gene Counts")

Plot Simulated Data

data.frame(CAPRA_S, y=as.integer(unlist(y_sim)+1)) %>% ggplot(aes(y=y, x=CAPRA_S)) + stat_function(fun=eq1, geom="line", xlim=c(0,7)) + geom_point() + labs(title="Simulated Sigmoidal Curve Data Points (Without Log Transformation)", y="Normalised Gene Expression +1")

data.frame(CAPRA_S, y=as.integer(unlist(y_sim))+1) %>% ggplot(aes(y=y, x=CAPRA_S)) + geom_point() + scale_y_log10() + labs(title="Simulated Sigmoidal Curve Data Points (With Log Transformation", y="Normalised Gene Expression +1")

Run TABI on the simulated Data

Test_TP<-data.frame(CAPRA_S, Gene=as.integer(unlist(y_sim))) %>% filter(Gene >=0)

TABI_TP<-TABI_glm(formula= ~CAPRA_S, #Include covariates with + 
                                                 data= Test_TP, prop_DE =0.1, scale_DE = 5, model=rstan::stan_model("~/PhD/TABI/src/stan_files/DE_sigmoid.stan")
)

Posterior Diagnositc/Mixing Check

traceplot(TABI_TP$fit,inc_warmup=T)

Is the simulated gene detected as significantly associated with CAPRA-S?

plot_posterior(TABI_TP, CI=0.95)

What does TABI think are the values of the parameters?

Actual Values eta=2.29, beta=0.9, y_0=600 (log_y0 =6.3969297)

#Parameter Estimates
get_posterior_mean(TABI_TP$fit)[1:5,]
##                      mean-chain:1 mean-chain:2 mean-chain:3 mean-chain:4
## inflection[1]            1.995882     1.765083     1.307466    0.8801745
## log_y_cross[1]           8.086324     8.105540     8.123506    8.1122274
## log_y_cross_prior[1]     8.058585     8.058484     8.140402    7.9959179
## log_y_cross_prior[2]     3.430947     3.701990     3.971939    4.4078455
## beta1_z[1,1]             1.417468     1.518086     1.259078    1.2946329
##                      mean-all chains
## inflection[1]               1.487152
## log_y_cross[1]              8.106899
## log_y_cross_prior[1]        8.063347
## log_y_cross_prior[2]        3.878180
## beta1_z[1,1]                1.372316
plot(TABI_TP$fit, pars=c("inflection[1]", "log_y_cross[1]", "beta1_z[1,1]"), ci_level = 0.8, outer_level = 0.95) + ggtitle("Posterior Parameter Intervals")