library(MASS)
setwd("/home/mazin/teaching/R/2014")
d = as.matrix(read.table("hw.data/5.hs.cnts.txt"))
meta = read.table("hw.data/5.hs.meta.txt", stringsAsFactors = F)
rin = meta$rin
sex = meta$sex
age = meta$age
age = log(age)

d = d[apply(d < 20, 1, sum) < 20, ]
dim(d)
## [1] 10432    40

pvs.age = matrix(ncol = 6, nrow = 100)
pvs.age[] = NA
colnames(pvs.age) = c("lm", "glm", "glm.nb", "lm.sh", "glm.sh", "glm.nb.sh")

totals = apply(d, 2, sum)
dn = round(sweep(d, 2, totals/mean(totals), "/"))
apply(dn, 2, sum)
##      c1      c2      c3      c4      c5      c6      c7      c8      c9 
## 3656830 3657009 3656963 3656949 3656479 3656920 3657056 3657061 3656987 
##     c10     c11     c12     c13     c14     c15     c16     c17     c18 
## 3656992 3657083 3657186 3655982 3657025 3657091 3657041 3657026 3656999 
##     c19     c20     c21     c22     c23     c24     c25     c26     c27 
## 3657029 3656931 3656961 3657050 3657026 3657090 3657085 3657001 3656951 
##     c28     c29     c30     c31     c32     c33     c34     c35     c36 
## 3657135 3657077 3657629 3657108 3656950 3656898 3656982 3657120 3656772 
##     c37     c38     c39     c40 
## 3657015 3656881 3657044 3657121

options(warn = -1)
for (i in 1:100) {
    # cat('\r',i)
    x = d[i, ]
    model = x ~ rin + sex + age + sex:age
    try({
        pvs.age[i, 1:3] = c(anova(lm(model))[3, 5], max(anova(glm(model, family = "quasipoisson"), 
            test = "Chisq")[4, 5], anova(glm(model, family = "poisson"), test = "Chisq")[4, 
            5]), anova(glm.nb(model), test = "Chisq")[4, 5])
        x = sample(x)
        pvs.age[i, 4:6] = c(anova(lm(model))[3, 5], max(anova(glm(model, family = "quasipoisson"), 
            test = "Chisq")[4, 5], anova(glm(model, family = "poisson"), test = "Chisq")[4, 
            5]), anova(glm.nb(model), test = "Chisq")[4, 5])
    }, silent = T)
}

pv.bh = cbind(p.adjust(pvs.age[, 1], m = "BH"), p.adjust(pvs.age[, 2], m = "BH"), 
    p.adjust(pvs.age[, 3], m = "BH"))
apply(pv.bh < 0.05, 2, sum, na.rm = T)
## [1] 32 33 35

for (i in 1:3) {
    pv.thr = max(pvs.age[pv.bh[, i] < 0.05, i], na.rm = T)
    print(sum(pvs.age[, 3 + i] <= pv.thr, na.rm = T)/sum(pv.bh[, i] < 0.05, 
        na.rm = T))
}
## [1] 0.09375
## [1] 0.09091
## [1] 0.1429