library(MASS)
setwd("/home/mazin/teaching/R/2014")
d = as.matrix(read.table("hw.data/5.hs.cnts.txt"))
meta = read.table("hw.data/5.hs.meta.txt", stringsAsFactors = F)
rin = meta$rin
sex = meta$sex
age = meta$age
age = log(age)
d = d[apply(d < 20, 1, sum) < 20, ]
dim(d)
## [1] 10432 40
pvs.age = matrix(ncol = 6, nrow = 100)
pvs.age[] = NA
colnames(pvs.age) = c("lm", "glm", "glm.nb", "lm.sh", "glm.sh", "glm.nb.sh")
totals = apply(d, 2, sum)
dn = round(sweep(d, 2, totals/mean(totals), "/"))
apply(dn, 2, sum)
## c1 c2 c3 c4 c5 c6 c7 c8 c9
## 3656830 3657009 3656963 3656949 3656479 3656920 3657056 3657061 3656987
## c10 c11 c12 c13 c14 c15 c16 c17 c18
## 3656992 3657083 3657186 3655982 3657025 3657091 3657041 3657026 3656999
## c19 c20 c21 c22 c23 c24 c25 c26 c27
## 3657029 3656931 3656961 3657050 3657026 3657090 3657085 3657001 3656951
## c28 c29 c30 c31 c32 c33 c34 c35 c36
## 3657135 3657077 3657629 3657108 3656950 3656898 3656982 3657120 3656772
## c37 c38 c39 c40
## 3657015 3656881 3657044 3657121
options(warn = -1)
for (i in 1:100) {
# cat('\r',i)
x = d[i, ]
model = x ~ rin + sex + age + sex:age
try({
pvs.age[i, 1:3] = c(anova(lm(model))[3, 5], max(anova(glm(model, family = "quasipoisson"),
test = "Chisq")[4, 5], anova(glm(model, family = "poisson"), test = "Chisq")[4,
5]), anova(glm.nb(model), test = "Chisq")[4, 5])
x = sample(x)
pvs.age[i, 4:6] = c(anova(lm(model))[3, 5], max(anova(glm(model, family = "quasipoisson"),
test = "Chisq")[4, 5], anova(glm(model, family = "poisson"), test = "Chisq")[4,
5]), anova(glm.nb(model), test = "Chisq")[4, 5])
}, silent = T)
}
pv.bh = cbind(p.adjust(pvs.age[, 1], m = "BH"), p.adjust(pvs.age[, 2], m = "BH"),
p.adjust(pvs.age[, 3], m = "BH"))
apply(pv.bh < 0.05, 2, sum, na.rm = T)
## [1] 32 33 35
for (i in 1:3) {
pv.thr = max(pvs.age[pv.bh[, i] < 0.05, i], na.rm = T)
print(sum(pvs.age[, 3 + i] <= pv.thr, na.rm = T)/sum(pv.bh[, i] < 0.05,
na.rm = T))
}
## [1] 0.09375
## [1] 0.09091
## [1] 0.1429