Create the following getTotalTreeHeight function:
getTotalTreeHeight <- function(tree)
{return(tree$nodes[[1]]@time)}
L = 100000; N = 1e4; n = 20; mu = 1.2e-8
expection <- 4*N*(1-(1/n))
tree <- generateTree(n, N)
tmrca <- getTotalTreeHeight(tree)
cat("Expection:",expection,"\nReality:", tmrca,"\n")
## Expection: 38000
## Reality: 83421
seqMatrix <- throwDownMutations(tree$branches, n, mu, L)
cat("A sample of the sequence matrix...\n")
## A sample of the sequence matrix...
seqMatrix[1:20,1:13]
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 1 1 1 1 0 0 1 1 0 0 0 0 0
## [2,] 1 0 0 1 0 0 0 0 1 0 0 0 0
## [3,] 1 0 0 1 0 0 0 0 0 0 0 0 0
## [4,] 1 1 1 1 0 0 1 1 0 0 0 0 0
## [5,] 1 0 0 1 0 1 0 0 0 0 0 1 0
## [6,] 1 0 0 1 0 0 0 0 0 0 0 1 0
## [7,] 1 0 0 1 0 0 0 0 0 0 0 1 0
## [8,] 1 0 1 1 0 0 1 0 0 0 0 0 0
## [9,] 1 1 1 1 0 0 1 1 0 0 0 0 0
## [10,] 1 0 0 1 0 0 0 0 0 0 0 1 0
## [11,] 1 1 1 1 0 0 1 1 0 0 0 0 0
## [12,] 1 0 1 1 1 0 1 0 0 0 0 0 1
## [13,] 1 0 0 1 0 0 0 0 1 0 0 0 0
## [14,] 1 1 1 1 0 0 1 1 0 0 0 0 0
## [15,] 1 0 0 1 0 1 0 0 0 0 0 1 0
## [16,] 1 0 0 1 0 0 0 0 1 0 0 0 0
## [17,] 0 0 0 0 0 0 0 0 0 1 1 0 0
## [18,] 1 0 1 1 0 0 1 0 0 0 0 0 0
## [19,] 1 0 0 1 0 1 0 0 0 0 0 1 0
## [20,] 1 0 0 1 0 0 0 0 0 0 0 1 0
Your Analysis here
ptm <- proc.time()
sfs <- as.data.frame(matrix())
for (i in 1:500){
tree <- generateTree(n, N)
tmrca <- getTotalTreeHeight(tree)
seqMatrix <- throwDownMutations(tree$branches, n, mu, L)
freq <- mean(colSums(seqMatrix))
sfs[i,] <- freq
}
cat("\nTiming of the inefficiency of this loop:\nTotal time to run: ")
##
## Timing of the inefficiency of this loop:
## Total time to run:
cat(proc.time()[3] - ptm[3],"seconds...")
## 21.346 seconds...
ggplot(sfs, aes(x=V1)) +
geom_histogram(aes(y=..density..), binwidth=.15, color="gray18",fill="white")+ geom_density(alpha=.2, fill="red",size=.5, color="gray14") + ggtitle("Site Frequency Spectrum,n=500\nSmaller Bins")+
scale_x_continuous(limits = c(min(sfs)-.5, max(sfs)+.5))+
gghisto+xlab("Mean Allele Frequencies \nAcrossTrials")

ggplot(sfs, aes(x=V1)) +
geom_histogram(aes(y=..density..), binwidth=.35, color="gray18",fill="white")+ geom_density(alpha=.2, fill="red",size=.5, color="gray14") + ggtitle("Site Frequency Spectrum,n=500\nLarger Bins")+
scale_x_continuous(limits = c(min(sfs)-.5, max(sfs)+.5))+
gghisto+xlab("Mean Allele Frequencies \nAcrossTrials")

tvar <- function(n,S) {
a1 <- sum(1/(seq(1,n-1,1)));ab <- sum(1/((seq(1,n-1,1))**2))
b <- (n+1)/(3*(n-1));bb <- (2*((n**2)+n+3))/((9*n)*(n-1))
c1 <- b-(1/a1);c2 <- bb-((n+2)/(a1*n))+(ab/(a1**2))
c3 <- c2/((a1**2)+ab);var <- ((c1/a1)*S)+(c3*S*(S-1))
return(var)
}
L = 100000;N = 1e3;n = 20;mu = 1.2e-8
w.theta <- c();pix <- c();td <- c();sfs <- as.data.frame(matrix())
for (i in 1:100){
tree <- generateTree(n, N)
tmrca <- getTotalTreeHeight(tree)
seqMatrix <- throwDownMutations(tree$branches, n, mu, L)
freq <- mean(colSums(seqMatrix))
sfs[i,] <- freq
#wattersons
denom <- rep(0,nrow(seqMatrix))
for (j in 1:(n-1)){
denom[j] <- (1/j)
realdenom <- sum(denom)}
w.theta <- c(w.theta,ncol(seqMatrix)/(realdenom))
#pi
for (j in 1:n){
jsum <- rowSums(seqMatrix)[j]
p <- jsum/n
x <- (2*p)*(n-p)*(n/(n-1))+pi
pix <- c(pix,x)
}
#tajimas
S <- ncol(seqMatrix)
td <- c(td,(pix[i]-w.theta[i])/sqrt(tvar(nrow(seqMatrix),ncol(seqMatrix))))
}
sfs.stat<- as.data.frame(c(w.theta, pix, td))
stat.names <- c(rep("Wattersons theta",length(w.theta)), rep("p .diversity",length(pix)),rep("Tajimas D",length(td)))
stat.df <- as.data.frame(cbind(sfs.stat,stat.names))
cat("--MEANS--\nWatterson's Theta:",mean(w.theta),"\nPi:",mean(pix),"\nTajima's D:",mean(td))
## --MEANS--
## Watterson's Theta: 4.69031
## Pi: 12.41577
## Tajima's D: 6.017637
ggplot(sfs, aes(x=V1)) +
geom_histogram(aes(y=..density..), binwidth=.4, color="gray18",fill="white")+ geom_density(alpha=.2, fill="red",size=.5, color="gray14") + ggtitle("Site Frequency Spectrum, n=200")+
scale_x_continuous(limits = c(min(sfs)-.5, max(sfs)+.5))+
gghisto+xlab("Mean Allele Frequencies \nAcrossTrials")

ggplot(stat.df, aes(x = stat.df$`c(w.theta, pix, td)`, fill = stat.names)) +geom_density(alpha = .3)+ ggtitle("Diversity of \nSimulated Sequences, n=200")+
scale_x_continuous(limits = c(min(pix), 20))+
gghisto+xlab("Value of Statistic \nAcrossTrials")
