library(ggplot2)
library(dplyr)
library(rstan)
#TABI
if("package:TABI" %in% search()) detach("package:TABI", unload=TRUE, force=TRUE); library(TABI)
Note: (True Positive Curve Shape was inspired by results/estimated fit from TABI run on gene NUSAP1).
#Create Generalised Sigmoid Function Equation
eq <- function(x) {
eta=2.29
beta=0.9
y_0=600
top<- (y_0*(1+exp(eta*beta)))
bottom<-(1+exp(eta*beta-x*beta))
return(top/bottom)
}
#For plotting add + 1 (prevent issues with log transformation)
eq1 <- function(x) {
eta=2.29
beta=0.9
y_0=600
top<- (y_0*(1+exp(eta*beta)))
bottom<-(1+exp(eta*beta-x*beta))
return(top/bottom+1)
}
#Plot function normal scale
ggplot(data.frame(x=c(0,8)), aes(x=x)) + stat_function(fun=eq1, geom="line") + labs(title="True Positive Curve", x="CAPRA-S", y="Normalised Gene Count")
#Plot Function log scale
ggplot(data.frame(x=c(0,8)), aes(x=x)) + stat_function(fun=eq1, geom="line") + scale_y_log10() + labs(title="True Positive Curve (Log Transformed Scale)", x="CAPRA-S", y="Normalised Gene Count")
#Distribution of CAPRA-S Scores
k<-0
for (i in 0:8) {
f<-PC_TCGA %>% filter(transcript=="NUSAP1")
k[i]<-sum(f$`CAPRA-S`==i, na.rm=T)}
k #Number of sample from Each CAPRA-S Score
## [1] 11 13 23 11 47 5 18 2
y<-sapply(0:7, FUN=eq) #Expected Value for each CAPRA_S score
#Take stardard normal (multiply by 500, giving sd of 500), then shift so that its mean value is the expected value
#A a particular CAPRA_S score, take n number of random samples from the resulting distribution
y_sim<-apply(array(c(y,k), dim=c(8,2)), MARGIN=1, FUN=function(x) { rnorm(n=x[2],mean(x[1]),sd=500)})
#Define corresponding CAPRA_S
CAPRA_S<-c(rep(0,11), rep(1,13), rep(2,23), rep(3,11), rep(4,47), rep(5,5), rep(6,18), rep(7,2))
#Resulting Distribution of Simulated Gene Counts
hist(as.integer(unlist(y_sim)), main="Distribution of Simulated Normalised Gene Counts")
data.frame(CAPRA_S, y=as.integer(unlist(y_sim)+1)) %>% ggplot(aes(y=y, x=CAPRA_S)) + stat_function(fun=eq1, geom="line", xlim=c(0,7)) + geom_point() + labs(title="Simulated Sigmoidal Curve Data Points (Without Log Transformation)", y="Normalised Gene Expression +1")
data.frame(CAPRA_S, y=as.integer(unlist(y_sim))+1) %>% ggplot(aes(y=y, x=CAPRA_S)) + geom_point() + scale_y_log10() + labs(title="Simulated Sigmoidal Curve Data Points (With Log Transformation", y="Normalised Gene Expression +1")
Test_TP<-data.frame(CAPRA_S, Gene=as.integer(unlist(y_sim))) %>% filter(Gene >=0)
TABI_TP<-TABI_glm(formula= ~CAPRA_S, #Include covariates with +
data= Test_TP, prop_DE =0.1, scale_DE = 5, model=rstan::stan_model("~/PhD/TABI/src/stan_files/DE_sigmoid.stan")
)
traceplot(TABI_TP$fit,inc_warmup=T)
plot_posterior(TABI_TP, CI=0.95)
Actual Values eta=2.29, beta=0.9, y_0=600 (log_y0 =6.3969297)
#Parameter Estimates
get_posterior_mean(TABI_TP$fit)[1:5,]
## mean-chain:1 mean-chain:2 mean-chain:3 mean-chain:4
## inflection[1] 1.995882 1.765083 1.307466 0.8801745
## log_y_cross[1] 8.086324 8.105540 8.123506 8.1122274
## log_y_cross_prior[1] 8.058585 8.058484 8.140402 7.9959179
## log_y_cross_prior[2] 3.430947 3.701990 3.971939 4.4078455
## beta1_z[1,1] 1.417468 1.518086 1.259078 1.2946329
## mean-all chains
## inflection[1] 1.487152
## log_y_cross[1] 8.106899
## log_y_cross_prior[1] 8.063347
## log_y_cross_prior[2] 3.878180
## beta1_z[1,1] 1.372316
plot(TABI_TP$fit, pars=c("inflection[1]", "log_y_cross[1]", "beta1_z[1,1]"), ci_level = 0.8, outer_level = 0.95) + ggtitle("Posterior Parameter Intervals")