dat <- data.frame(AA = c(634), Aa = c(391), aa = c(85), row.names = c("observed genotype counts"))
Warning messages:
1: In scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
EOF within quoted string
2: In scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
EOF within quoted string
3: In scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
EOF within quoted string
4: In scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
EOF within quoted string
dat
proportions (e.g. 1 = p^2+q^2+2pq)
allele_freq <- cbind(((2*dat$AA) + dat$Aa)/(2*rowSums(dat)),
((2*dat$aa) + dat$Aa)/(2*rowSums(dat)))
colnames(allele_freq) <- c("A", "a")
allele_freq
A a
observed genotype counts 0.7472973 0.2527027
HWEexpected <- t(apply(allele_freq, 1, function(x){
AA <- x["A"]^2
Aa <- 2*x["A"]*x["a"]
aa <- x["a"]^2
return(cbind(AA, Aa, aa))
}))
HWEexpected
[,1] [,2] [,3]
observed genotype counts 0.5584533 0.3776881 0.06385866
# convert genotypes and proportions into lists for efficient tests
dat <- lapply(1:nrow(dat), function(i) return(as.vector(dat[i,])))
HWEexpected <- lapply(1:nrow(HWEexpected), function(i){
return(as.vector(HWEexpected[i,]))
})
# Run HWE test
mapply(FUN = chisq.test, x = dat, p = HWEexpected, SIMPLIFY = FALSE)
[[1]]
Chi-squared test for given probabilities
data: dots[[1L]][[1L]]
X-squared = 5.0344, df = 2, p-value = 0.08068
The p-value is greater than 0.05 so we fail to reject the null hypothesis that the population is in Hardy-Weinberg Equilibrium
dat2 <- data.frame(pp = c(9), pq = c(135), pr = c(2), qq = c(75), qr = c(39), rr = c(25), row.names =
c("observed genotype counts"))
dat2
proportions (e.g. 1 = p^2+q^2+2pq)
allele_freq <- cbind(((2*dat2$pp) + dat2$pq + dat2$pr)/(2*rowSums(dat2)),
((2*dat2$qq) + dat2$pq + dat2$qr)/(2*rowSums(dat2)),
((2*dat2$rr) + dat2$pr + dat2$qr)/(2*rowSums(dat2)))
colnames(allele_freq) <- c("p", "q", "r")
row.names(allele_freq) <- c("gene frequency")
allele_freq
p q r
gene frequency 0.2719298 0.5684211 0.1596491
HWEexpected <- t(apply(allele_freq, 1, function(x){
pp <- x["p"]^2
pq <- 2*x["p"]*x["q"]
pr <- 2*x["p"]*x["r"]
qq <- x["q"]^2
qr <- 2*x["q"]*x["r"]
rr <- x["r"]^2
return(cbind(pp, pq, pr, qq, qr, rr))
}))
colnames(HWEexpected) <- c("pp", "pq", "pr", "qq", "qr", "rr")
HWEexpected
pp pq pr qq qr rr
gene frequency 0.07394583 0.3091413 0.08682672 0.3231025 0.1814958 0.02548784
# convert genotypes and proportions into lists for efficient tests
dat2 <- lapply(1:nrow(dat2), function(i) return(as.vector(dat2[i,])))
HWEexpected <- lapply(1:nrow(HWEexpected), function(i){
return(as.vector(HWEexpected[i,]))
})
# Run HWE test
mapply(FUN = chisq.test, x = dat2, p = HWEexpected, SIMPLIFY = FALSE)
[[1]]
Chi-squared test for given probabilities
data: dots[[1L]][[1L]]
X-squared = 102.39, df = 5, p-value < 2.2e-16
p-value is less than 0.05 so we reject the null of HWE.
dat3 <- data.frame(M = c(475, 195, 896, 14), MN = c(89, 215, 1559, 48), N = c(5, 79, 645, 138), row.names = c("Eskimos", "Russians", "Belgians", "Papuans"))
dat3
proportions (e.g. 1 = p^2+q^2+2pq)
allele_freq_esk <- cbind(((2*dat3[1,1]) + dat3[1,2])/(2*sum(dat3[1,])),
((2*dat3[1,3]) + dat3[1,2])/(2*sum(dat3[1,])))
colnames(allele_freq_esk) <- c("M", "N")
allele_freq_esk
M N
[1,] 0.9130053 0.08699473
allele_freq_rus <- cbind(((2*dat3[2,1]) + dat3[2,2])/(2*sum(dat3[2,])),
((2*dat3[2,3]) + dat3[2,2])/(2*sum(dat3[2,])))
colnames(allele_freq_rus) <- c("M", "N")
allele_freq_rus
M N
[1,] 0.6186094 0.3813906
allele_freq_bel <- cbind(((2*dat3[3,1]) + dat3[3,2])/(2*sum(dat3[3,])),
((2*dat3[3,3]) + dat3[3,2])/(2*sum(dat3[3,])))
colnames(allele_freq_bel) <- c("M", "N")
allele_freq_bel
M N
[1,] 0.5404839 0.4595161
allele_freq_pap <- cbind(((2*dat3[4,1]) + dat3[4,2])/(2*sum(dat3[4,])),
((2*dat3[4,3]) + dat3[4,2])/(2*sum(dat3[4,])))
colnames(allele_freq_pap) <- c("M", "N")
allele_freq_pap
M N
[1,] 0.19 0.81
HWEexpected_esk <- t(apply(allele_freq_esk, 1, function(x){
M <- x["M"]^2
MN <- 2*x["M"]*x["N"]
N <- x["N"]^2
return(cbind(M,MN,N))
}))
colnames(HWEexpected_esk) <- c("M", "MN", "N")
HWEexpected_esk
M MN N
[1,] 0.8335786 0.1588533 0.007568083
HWEexpected_rus <- t(apply(allele_freq_rus, 1, function(x){
M <- x["M"]^2
MN <- 2*x["M"]*x["N"]
N <- x["N"]^2
return(cbind(M,MN,N))
}))
colnames(HWEexpected_rus) <- c("M", "MN", "N")
HWEexpected_rus
M MN N
[1,] 0.3826776 0.4718636 0.1454588
HWEexpected_bel <- t(apply(allele_freq_bel, 1, function(x){
M <- x["M"]^2
MN <- 2*x["M"]*x["N"]
N <- x["N"]^2
return(cbind(M,MN,N))
}))
colnames(HWEexpected_bel) <- c("M", "MN", "N")
HWEexpected_bel
M MN N
[1,] 0.2921228 0.4967221 0.2111551
HWEexpected_pap <- t(apply(allele_freq_pap, 1, function(x){
M <- x["M"]^2
MN <- 2*x["M"]*x["N"]
N <- x["N"]^2
return(cbind(M,MN,N))
}))
colnames(HWEexpected_pap) <- c("M", "MN", "N")
HWEexpected_pap
M MN N
[1,] 0.0361 0.3078 0.6561
# convert genotypes and proportions into lists for efficient tests
dat3_esk <- lapply(1:nrow(dat3[1,]), function(i){return(as.vector(dat3[i,]))})
HWEexpected_esk <- lapply(1:nrow(HWEexpected_esk), function(i){return(as.vector(HWEexpected_esk[i,]))
})
# Run HWE test
mapply(FUN = chisq.test, x = dat3_esk, p = HWEexpected_esk, SIMPLIFY = FALSE)
Chi-squared approximation may be incorrect
[[1]]
Chi-squared test for given probabilities
data: dots[[1L]][[1L]]
X-squared = 0.13408, df = 2, p-value = 0.9352
Chi-squared doesn’t like low values #### . Check Eskimo chi-squared against r-package for Hardy-Weinberg
library(HardyWeinberg)
Loading required package: mice
Loading required package: lattice
Loading required package: Rsolnp
eskobs <- c(AA = 475, AB = 89, BB = 5)
HWChisq(eskobs)
Expected counts below 5: chi-square approximation may be incorrect
Chi-square test with continuity correction for Hardy-Weinberg equilibrium (autosomal)
Chi2 = 0.01751215 DF = 1 p-value = 0.8947205 D = -0.693761 f = 0.01535081
This test isn’t happy either… To the best that I can tell, Eskimos are in HWE for blood groups ### . Russians
# convert genotypes and proportions into lists for efficient tests
dat3_rus <- lapply(1:nrow(dat3[2,]), function(i){return(as.vector(dat3[i,]))})
HWEexpected_rus <- lapply(1:nrow(HWEexpected_rus), function(i){return(as.vector(HWEexpected_rus[i,]))
})
# Run HWE test
mapply(FUN = chisq.test, x = dat3_rus, p = HWEexpected_rus, SIMPLIFY = FALSE)
[[1]]
Chi-squared test for given probabilities
data: dots[[1L]][[1L]]
X-squared = 497, df = 2, p-value < 2.2e-16
Russians are not in HWE
# convert genotypes and proportions into lists for efficient tests
dat3_bel <- lapply(1:nrow(dat3[3,]), function(i){return(as.vector(dat3[i,]))})
HWEexpected_bel <- lapply(1:nrow(HWEexpected_bel), function(i){return(as.vector(HWEexpected_bel[i,]))
})
# Run HWE test
mapply(FUN = chisq.test, x = dat3_bel, p = HWEexpected_bel, SIMPLIFY = FALSE)
[[1]]
Chi-squared test for given probabilities
data: dots[[1L]][[1L]]
X-squared = 816.64, df = 2, p-value < 2.2e-16
Belgians are not in HWE ### . Papuans
# convert genotypes and proportions into lists for efficient tests
dat3_pap <- lapply(1:nrow(dat3[4,]), function(i){return(as.vector(dat3[i,]))})
HWEexpected_pap <- lapply(1:nrow(HWEexpected_pap), function(i){return(as.vector(HWEexpected_pap[i,]))
})
# Run HWE test
mapply(FUN = chisq.test, x = dat3_pap, p = HWEexpected_pap, SIMPLIFY = FALSE)
[[1]]
Chi-squared test for given probabilities
data: dots[[1L]][[1L]]
X-squared = 10460, df = 2, p-value < 2.2e-16
Papuans are not in HWE
dat <- data.frame(AA = c(100), Aa = c(0), aa = c(100), row.names = c("observed genotype counts"))
dat
proportions (e.g. 1 = p^2+q^2+2pq)
allele_freq_dro <- cbind(((2*dat$AA) + dat$Aa)/(2*rowSums(dat)),
((2*dat$aa) + dat$Aa)/(2*rowSums(dat)))
colnames(allele_freq_dro) <- c("A", "a")
allele_freq_dro
A a
observed genotype counts 0.5 0.5
HWEexpected_dro <- t(apply(allele_freq_dro, 1, function(x){
AA <- x["A"]^2
Aa <- 2*x["A"]*x["a"]
aa <- x["a"]^2
return(cbind(AA, Aa, aa))
}))
colnames(HWEexpected_dro) <- c("AA", "Aa", "aa")
HWEexpected_dro
AA Aa aa
observed genotype counts 0.25 0.5 0.25
AA x aa cross
f1 <- 4 * allele_freq[,1] * allele_freq[,2]
f1
[1] 0.6182825
proportions (e.g. 1 = p^2+q^2+2pq)
p = 0.08
q = 1-p
allele_freq <- data.frame("pp" = c(p^2), "2pq" = c(2*p*q), "qq" = c(q^2), row.names = c("genotype frequency"))
allele_freq
4 alleles at a locus
#A1 = p; A2 = q; A3 = r; A4 = s
allele_freq <- data.frame(p = c(.5), q = c(.3), r = c(.15), s = c(0.05), row.names = c("allele frequencies"))
allele_freq
HWEexpected <- t(apply(allele_freq, 1, function(x){
pp <- x["p"]^2
pq <- 2*x["p"]*x["q"]
pr <- 2*x["p"]*x["r"]
ps <- 2*x["p"]*x["s"]
qq <- x["q"]^2
qr <- 2*x["q"]*x["r"]
qs <- 2*x["q"]*x["s"]
rr <- x["r"]^2
rs <- 2*x["r"]*x["s"]
ss <- x["s"]^2
return(cbind(pp, pq, pr, ps, qq, qr, qs, rr, rs, ss))
}))
colnames(HWEexpected) <- c("pp", "pq", "pr", "ps", "qq", "qr","qs", "rr", "rs", "ss")
HWEexpected
pp pq pr ps qq qr qs rr rs ss
allele frequencies 0.25 0.3 0.15 0.05 0.09 0.09 0.03 0.0225 0.015 0.0025
hetper <- rowSums(HWEexpected) -
(HWEexpected[,1] +
HWEexpected[,5] +
HWEexpected[,8] +
HWEexpected[,10])
hetper
allele frequencies
0.635
pop = 100
ss = HWEexpected[,10]
hets <- 2*(HWEexpected[,9]+HWEexpected[,7]+HWEexpected[,4])
print(pophets <- pop*hets)
[1] 19
print(popss <- pop*ss)
[1] 0.25
Using the drosophila population frequencies from question 5…
allele_freq_dro <- cbind(((2*dat$AA) + dat$Aa)/(2*rowSums(dat)),
((2*dat$aa) + dat$Aa)/(2*rowSums(dat)))
colnames(allele_freq_dro) <- c("A", "a")
allele_freq_dro
A a
observed genotype counts 0.5 0.5
f = 0.25
HWEexpected_inb <- t(apply(allele_freq_dro, 1, function(x){
AA <- x["A"]^2 + f*x["A"]*x["a"]
Aa <- 2*x["A"]*x["a"]*(1-f)
aa <- x["a"]^2 + f*x["A"]*x["a"]
return(cbind(AA, Aa, aa))
}))
colnames(HWEexpected_inb) <- c("AA", "Aa", "aa")
HWEexpected_inb
AA Aa aa
observed genotype counts 0.3125 0.375 0.3125
falg = (HWEexpected_dro[,2]-HWEexpected_inb[,2])/HWEexpected_dro[,2]
falg
[1] 0.25
Sys.setenv(RSTUDIO_PANDOC="--- insert directory here ---")