R Notebook

Deletions and duplications drawn from identical allele frequency distributions. Deletions more common in high rec. regions

# deletions common in high rec. 
del_low<-runif(100)
del_high<-runif(1000)
deletion<-c(del_low,del_high)
rec<-c(rep("low",100),rep("high",1000))
type<-rep("deletion",1100)

#duplications common in low rec
dup_low<-runif(1000)
dup_high<-runif(100)
duplication<-c(dup_low,dup_high)
rec<-c(rec,rep("low",1000),rep("high",100))
type<-c(type,rep("duplication",1100))
freq<-c(deletion,duplication)

Both types have same frequencies.

t.test(freq~type)

## 
##  Welch Two Sample t-test
## 
## data:  freq by type
## t = -0.047266, df = 2197, p-value = 0.9623
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02515389  0.02396989
## sample estimates:
##    mean in group deletion mean in group duplication 
##                 0.5003757                 0.5009677

boxplot(freq~type)

High and low rec. have same frequencies

t.test(freq~rec)

## 
##  Welch Two Sample t-test
## 
## data:  freq by rec
## t = -0.062925, df = 2197.5, p-value = 0.9498
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02535001  0.02377375
## sample estimates:
## mean in group high  mean in group low 
##          0.5002776          0.5010657

boxplot(freq~rec)

Model of frequency shows no interaction

bob<-data.frame(freq,type,rec)
model1<-lm(bob$freq~bob$type+bob$rec)
summary(model1)

## 
## Call:
## lm(formula = bob$freq ~ bob$type + bob$rec)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.50093 -0.24979 -0.00998  0.26461  0.49858 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          0.5002921  0.0090772  55.115   <2e-16 ***
## bob$typeduplication -0.0001598  0.0217889  -0.007    0.994    
## bob$reclow           0.0009189  0.0217889   0.042    0.966    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2938 on 2197 degrees of freedom
## Multiple R-squared:  1.826e-06,  Adjusted R-squared:  -0.0009085 
## F-statistic: 0.002006 on 2 and 2197 DF,  p-value: 0.998

model2<-lm(bob$freq~bob$type+bob$rec+bob$rec:bob$type)
summary(model2)

## 
## Call:
## lm(formula = bob$freq ~ bob$type + bob$rec + bob$rec:bob$type)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.52587 -0.25019 -0.01165  0.26455  0.50154 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.497336   0.009288  53.545   <2e-16 ***
## bob$typeduplication             0.032353   0.030806   1.050    0.294    
## bob$reclow                      0.033431   0.030806   1.085    0.278    
## bob$typeduplication:bob$reclow -0.065025   0.043566  -1.493    0.136    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2937 on 2196 degrees of freedom
## Multiple R-squared:  0.001015,   Adjusted R-squared:  -0.0003495 
## F-statistic: 0.7439 on 3 and 2196 DF,  p-value: 0.5258

anova(model2,model1)