Deletions and duplications drawn from identical allele frequency distributions. Deletions more common in high rec. regions
# deletions common in high rec.
del_low<-runif(100)
del_high<-runif(1000)
deletion<-c(del_low,del_high)
rec<-c(rep("low",100),rep("high",1000))
type<-rep("deletion",1100)
#duplications common in low rec
dup_low<-runif(1000)
dup_high<-runif(100)
duplication<-c(dup_low,dup_high)
rec<-c(rec,rep("low",1000),rep("high",100))
type<-c(type,rep("duplication",1100))
freq<-c(deletion,duplication)
Both types have same frequencies.
t.test(freq~type)
##
## Welch Two Sample t-test
##
## data: freq by type
## t = -0.047266, df = 2197, p-value = 0.9623
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02515389 0.02396989
## sample estimates:
## mean in group deletion mean in group duplication
## 0.5003757 0.5009677
boxplot(freq~type)
High and low rec. have same frequencies
t.test(freq~rec)
##
## Welch Two Sample t-test
##
## data: freq by rec
## t = -0.062925, df = 2197.5, p-value = 0.9498
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02535001 0.02377375
## sample estimates:
## mean in group high mean in group low
## 0.5002776 0.5010657
boxplot(freq~rec)
Model of frequency shows no interaction
bob<-data.frame(freq,type,rec)
model1<-lm(bob$freq~bob$type+bob$rec)
summary(model1)
##
## Call:
## lm(formula = bob$freq ~ bob$type + bob$rec)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.50093 -0.24979 -0.00998 0.26461 0.49858
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5002921 0.0090772 55.115 <2e-16 ***
## bob$typeduplication -0.0001598 0.0217889 -0.007 0.994
## bob$reclow 0.0009189 0.0217889 0.042 0.966
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2938 on 2197 degrees of freedom
## Multiple R-squared: 1.826e-06, Adjusted R-squared: -0.0009085
## F-statistic: 0.002006 on 2 and 2197 DF, p-value: 0.998
model2<-lm(bob$freq~bob$type+bob$rec+bob$rec:bob$type)
summary(model2)
##
## Call:
## lm(formula = bob$freq ~ bob$type + bob$rec + bob$rec:bob$type)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52587 -0.25019 -0.01165 0.26455 0.50154
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.497336 0.009288 53.545 <2e-16 ***
## bob$typeduplication 0.032353 0.030806 1.050 0.294
## bob$reclow 0.033431 0.030806 1.085 0.278
## bob$typeduplication:bob$reclow -0.065025 0.043566 -1.493 0.136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2937 on 2196 degrees of freedom
## Multiple R-squared: 0.001015, Adjusted R-squared: -0.0003495
## F-statistic: 0.7439 on 3 and 2196 DF, p-value: 0.5258
anova(model2,model1)