Bneck Stuff

Packages

require(ggplot2)

## Loading required package: ggplot2

Params

N: wild Ne
t_recover: generations since recovery
\( \mu \): mutation rate
L: length of region considered in bp
\( \theta \): Population mutation rate
\( \rho \): Population recombination rate. For simplicity, assume \( \rho=\theta \).
bneck_N: \( N_e \) at beginning of bottleneck
bneck_dur: generations of bneck
\( \alpha \): Growth parameter

Function to calculate Ne during exponential growth

bneck <- function(size, duration, recovery = 0) {
    alpha = -1/(duration/4 * N) * log(size/N)
    t <- 1:duration
    n = c(N * exp(-alpha * (t/4 * N)), rep(N, recovery))
    ne = (duration + recovery)/(sum(1/n))
    return(c(ne, ne/duration))
}

Constant Size Models

Detail Ne model 1: constant

mu = 3e-08
N = 1e+05
d_time = 6000
region = 1e+05
theta = 4 * N * mu * region
bneck_N = 23200
bneck_dur = 5000
alpha = -(1/((bneck_dur)/(4 * N))) * log(bneck_N/N)
t_recover = d_time - bneck_dur

ne1 = as.numeric()
time1 = as.numeric()
# phase1 postbneck
time1 = 1:100 * t_recover/100
ne1 = rep(N, 100)
# phase2 bneck
b1 = bneck_N
k1 = round(bneck_N/bneck_dur, 2)
time1 = c(time1, 1:100 * bneck_dur/100 + t_recover)
ne1 = c(ne1, rep(bneck_N, 100))
# phase3 prebneck
time1 = c(time1, 1:10 * 50 + t_recover + bneck_dur)
ne1 = c(ne1, rep(N, 10))

Detail Ne model 2: constant

mu = 3e-08
N = 1e+05
d_time = 6000
region = 1e+05
theta = 4 * N * mu * region
bneck_N = 4640
bneck_dur = 1000
alpha = -(1/((bneck_dur)/(4 * N))) * log(bneck_N/N)
t_recover = d_time - bneck_dur

ne2 = as.numeric()
time2 = as.numeric()
# phase1 postbneck
time2 = 1:100 * t_recover/100
ne2 = rep(N, 100)
# phase2 bneck
b2 = bneck_N
k2 = round(bneck_N/bneck_dur, 2)
time2 = c(time2, 1:100 * bneck_dur/100 + t_recover)
ne2 = c(ne2, rep(bneck_N, 100))
# phase3 prebneck
time2 = c(time2, 1:10 * 50 + t_recover + bneck_dur)
ne2 = c(ne2, rep(N, 10))

Exponential Growth Models

Detail for model 3: exponential

mu = 3e-08
N = 1e+05
d_time = 6000
region = 1e+05
theta = 4 * N * mu * region
bneck_N = 8700
bneck_dur = 5000
alpha = -(1/((bneck_dur)/(4 * N))) * log(bneck_N/N)
t_recover = d_time - bneck_dur

ne3 = as.numeric()
time3 = as.numeric()
# phase1 postbneck
time3 = 1:100 * t_recover/100
ne3 = rep(N, 100)
# phase2 bneck
b3 = bneck(bneck_N, bneck_dur)[1]
k3 = round(bneck(bneck_N, bneck_dur)[2], 2)
time3 = c(time3, curve(N * exp(-alpha * x), from = 1/(4 * N), to = bneck_dur/(4 * 
    N))$x * 4 * N + t_recover)
ne3 = c(ne3, curve(N * exp(-alpha * x), from = 1/(4 * N), to = bneck_dur/(4 * 
    N))$y)

plot of chunk unnamed-chunk-5

# phase3 prebneck
time3 = c(time3, 1:10 * 50 + t_recover + bneck_dur)
ne3 = c(ne3, rep(N, 10))

Detail for model 4: exponential

# params
mu = 3e-08
N = 1e+05
d_time = 6000
region = 1e+05
theta = 4 * N * mu * region
bneck_N = 1000
bneck_dur = 1000
alpha = -(1/((bneck_dur)/(4 * N))) * log(bneck_N/N)
t_recover = d_time - bneck_dur

ne4 = as.numeric()
time4 = as.numeric()
# phase1 postbneck
time4 = 1:100 * t_recover/100
ne4 = rep(N, 100)
# phase2 bneck
b4 = bneck(bneck_N, bneck_dur)[1]
k4 = round(bneck(bneck_N, bneck_dur)[2], 2)
time4 = c(time4, curve(N * exp(-alpha * x), from = 1/(4 * N), to = bneck_dur/(4 * 
    N))$x * 4 * N + t_recover)
ne4 = c(ne4, curve(N * exp(-alpha * x), from = 1/(4 * N), to = bneck_dur/(4 * 
    N))$y)

plot of chunk unnamed-chunk-6

# phase3 prebneck
time4 = c(time4, 1:10 * 50 + t_recover + bneck_dur)
ne4 = c(ne4, rep(N, 10))

Detail for model 5: long exponential

# params
mu = 3e-08
N = 1e+05
d_time = 6000
region = 1e+05
theta = 4 * N * mu * region
bneck_N = 1000
bneck_dur = 5000
alpha = -(1/((bneck_dur)/(4 * N))) * log(bneck_N/N)
t_recover = d_time - bneck_dur

ne4 = as.numeric()
time4 = as.numeric()
# phase1 postbneck
time4 = 1:100 * t_recover/100
ne4 = rep(N, 100)
# phase2 bneck
b4 = bneck(bneck_N, bneck_dur)[1]
k4 = round(bneck(bneck_N, bneck_dur)[2], 2)
time4 = c(time4, curve(N * exp(-alpha * x), from = 1/(4 * N), to = bneck_dur/(4 * 
    N))$x * 4 * N + t_recover)
ne4 = c(ne4, curve(N * exp(-alpha * x), from = 1/(4 * N), to = bneck_dur/(4 * 
    N))$y)

plot of chunk unnamed-chunk-7

# phase3 prebneck
time4 = c(time4, 1:10 * 50 + t_recover + bneck_dur)
ne4 = c(ne4, rep(N, 10))

Comparisons

See stats results in github repo.

Stats calculated are: *rep *pop
*S *n1
*next
*theta *pi
*thetaH
*Hprime
*tajd
*fulif
*fulid *fulifs
*fulids *r *rmmg *nhaps *hdiv
*wallb *wallq *rosasrf *rosasru
*zns
*unique
*shared
*fixed *Fst

Get data (note: put these as gists or figshare)

# get data
system("scp farm:~/projects/bneck/results/* ~/Desktop/")
big_constant <- read.table("~/Desktop/71026stats.txt", header = T)
sm_constant <- read.table("~/Desktop/71166stats.txt", header = T)
big_exp <- read.table("~/Desktop/71407stats.txt", header = T)
sm_exp <- read.table("~/Desktop/71507stats.txt", header = T)

# split teo maize
cbteo <- subset(big_constant, big_constant$pop == 1)
cbmz <- subset(big_constant, big_constant$pop == 0)
csteo <- subset(sm_constant, sm_constant$pop == 1)
csmz <- subset(sm_constant, sm_constant$pop == 0)
ebteo <- subset(big_exp, big_exp$pop == 1)
ebmz <- subset(big_exp, big_exp$pop == 0)
esteo <- subset(sm_exp, sm_exp$pop == 1)
esmz <- subset(sm_exp, sm_exp$pop == 0)

# add descriptors
csteo$size = "small"
csteo$model = "constant"
csteo$taxa = "teo"
cbteo$size = "big"
cbteo$model = "constant"
cbteo$taxa = "teo"
esteo$size = "small"
esteo$model = "exp"
esteo$taxa = "teo"
ebteo$size = "big"
ebteo$model = "exp"
ebteo$taxa = "teo"
csmz$size = "small"
csmz$model = "constant"
csmz$taxa = "mz"
cbmz$size = "big"
cbmz$model = "constant"
cbmz$taxa = "mz"
esmz$size = "small"
esmz$model = "exp"
esmz$taxa = "mz"
ebmz$size = "big"
ebmz$model = "exp"
ebmz$taxa = "mz"

1 vs 2: Big constant vs. sm_constant, same k

# plot
plot(ne1 ~ time1, ylim = c(1, N), xlab = "time in past", ylab = "N", type = "l", 
    col = "red", lwd = 2)
points(ne2 ~ time2, type = "l", col = "blue", lwd = 2)
abline(h = b1, col = "red", lty = 2, lwd = 2)
abline(h = b2, col = "blue", lty = 2, lwd = 2)
legend("center", box.lwd = 0, lty = 2, lwd = 2, col = c("red", "blue"), legend = c(paste("k=", 
    k1), paste("k=", k2)))

plot of chunk unnamed-chunk-9

Diversity in domesticate pretty close

ggplot(rbind(csmz, cbmz), aes(x = pi, fill = size)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(big = "red", small = "blue"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-10

TajD higher in big

ggplot(rbind(csmz, cbmz), aes(x = tajd, fill = size)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(big = "red", small = "blue"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-11

Num Unique SNPs higher in both dom and wild

ggplot(rbind(csmz, cbmz), aes(x = unique, fill = size)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(big = "red", small = "blue"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-12

ggplot(rbind(csteo, cbteo), aes(x = unique, fill = size)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(big = "red", small = "blue"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-12

1 vs 3 Constant vs. exponential, larger Ne same k

# plot
plot(ne1 ~ time1, ylim = c(1, N), xlab = "time in past", ylab = "N", type = "l", 
    col = "red", lwd = 2)
points(ne3 ~ time3, type = "l", col = "green", lwd = 2)
abline(h = b1, col = "red", lty = 2, lwd = 2)
abline(h = b3, col = "green", lty = 2, lwd = 2)
legend("center", box.lwd = 0, lty = 2, lwd = 2, col = c("red", "green"), legend = c(paste("k=", 
    k1), paste("k=", k3)))

plot of chunk unnamed-chunk-13

Diversity same == same mean Ne.

ggplot(rbind(ebmz, cbmz), aes(x = pi, fill = model)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(constant = "red", 
    exp = "green"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-14

Tajd lower in exponential. More growth (more growth due to smaller initial size)?

ggplot(rbind(ebmz, cbmz), aes(x = tajd, fill = model)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(constant = "red", 
    exp = "green"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-15

Num Unique SNPs higher in exponential dom and wild. Bigger effect on dom. Stronger bneck to get same Ne?

ggplot(rbind(ebmz, cbmz), aes(x = unique, fill = model)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(constant = "red", 
    exp = "green"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-16

ggplot(rbind(ebteo, cbteo), aes(x = unique, fill = model)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(constant = "red", 
    exp = "green"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-16

2 vs 4 constant vs. exponential, smaller Ne same k

# plot
plot(ne2 ~ time2, ylim = c(1, N), xlab = "time in past", ylab = "N", type = "l", 
    col = "blue", lwd = 2)
points(ne4 ~ time4, type = "l", col = "grey", lwd = 2)
abline(h = b2, col = "blue", lty = 2, lwd = 2)
abline(h = b4, col = "grey", lty = 2, lwd = 2)
legend("center", box.lwd = 0, lty = 2, lwd = 2, col = c("blue", "grey"), legend = c(paste("k=", 
    k2), paste("k=", k4)))

plot of chunk unnamed-chunk-17

Diversity same b/c same mean Ne.

ggplot(rbind(esmz, csmz), aes(x = pi, fill = model)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(constant = "blue", 
    exp = "grey"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-18

Tajd \( \pm \) same.

ggplot(rbind(esmz, csmz), aes(x = tajd, fill = model)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(constant = "blue", 
    exp = "grey"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-19

Num Unique SNPs same in both maize and teo. Shorter time for growth? Smaller overall difference?

ggplot(rbind(esmz, csmz), aes(x = unique, fill = model)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(constant = "blue", 
    exp = "grey"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-20

ggplot(rbind(esteo, csteo), aes(x = unique, fill = model)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(constant = "blue", 
    exp = "grey"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-20

3 vs 4 Big exponential vs. small exponential, same Ne.

# plot
plot(ne3 ~ time3, ylim = c(0, N), xlab = "time in past", ylab = "N", type = "l", 
    col = "green", lwd = 2)
points(ne4 ~ time4, type = "l", col = "grey", lwd = 2)
abline(h = b3, col = "green", lty = 2, lwd = 2)
abline(h = b4, col = "grey", lty = 2, lwd = 2)
legend("left", box.lwd = 0, lty = 2, lwd = 2, col = c("green", "grey"), legend = c(paste("k=", 
    k3), paste("k=", k4)))

plot of chunk unnamed-chunk-21

Diversity same == same mean Ne.

ggplot(rbind(ebmz, esmz), aes(x = pi, fill = size)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(big = "green", small = "grey"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-22

Tajd lower in exponential. More growth (more growth due to smaller initial size)?

ggplot(rbind(ebmz, esmz), aes(x = tajd, fill = size)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(big = "green", small = "grey"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-23

Num Unique SNPs higher in exponential dom and wild. Bigger effect on dom. Stronger bneck to get same Ne?

ggplot(rbind(ebmz, esmz), aes(x = unique, fill = size)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(big = "green", small = "grey"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-24

ggplot(rbind(ebteo, esteo), aes(x = unique, fill = size)) + geom_histogram(alpha = 0.5, 
    position = "identity") + scale_fill_manual(values = c(big = "green", small = "grey"))

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-24