library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Simulate an unfolded SFS of for 26 samples
sfs<-rep(1:25/26,times=c(rmultinom(n=1,size=10000,prob=c(1/1:25))))
theta=10000/(sum(1/1:25))
number<-as.numeric(table(sfs))
counts<-1:25
unfolded<-data.frame(counts,number,theta/counts)
ggplot(unfolded,aes(y=number,x=counts,color="black")) +
geom_point() +
geom_point(aes(y=theta.counts,x=counts,color="red"))+
scale_color_manual(values=c("green","black"),labels=c("neutral","simulated"))+
labs(y="number of SNPs",x="count in sample",color="Data") +
theme_minimal()
Fold it
folded<-mutate(unfolded,fcounts=ifelse(counts<26-counts,counts,26-counts)) %>% group_by(fcounts) %>% summarize(simtotal=sum(number),exptotal=sum(theta.counts))
ggplot(folded,aes(y=simtotal,x=fcounts)) +
geom_point() +
geom_point(aes(y=exptotal,x=fcounts,color="red"))+
scale_color_manual(values=c("green","black"),labels=c("neutral","simulated"))+
labs(y="number of SNPs",x="count in sample",color="Data") +
theme_minimal()