EPI 288 Lecture 1: Multiple testing

P-value adjustment methods available in R

p.adjust(p, method = p.adjust.methods, n = length(p))

# p.adjust.methods available
# c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")

Load data in SAS format

library(sas7bdat)
vasdata <- read.sas7bdat("./vasdata.sas7bdat")

1. Create multiple cross tables and test

## Create 50 cross tables
tabs1 <- lapply(vasdata[,grep("gened", names(vasdata))],
                function(X) {
                    xtabs(~ X + vasdata$vasc)
                })

## Test 50 tables with Chi-squared test
chisq1 <- lapply(tabs1, chisq.test, correct = F)

## Obtain 50 p-values, and reorder by p-values
p.val1 <- data.frame(raw_p = sapply(chisq1, function(X) X$p.val))
p.val1 <- p.val1[order(p.val1$raw_p),, drop = F]

## List ones significant at < 0.05 (No adjustment)
p.val1[p.val1$raw_p < 0.05,, drop = F]
               raw_p
gened29 0.0000004424
gened6  0.0018676504
gened8  0.0019300701
gened4  0.0029006617
gened47 0.0085328883
gened7  0.0112625965
gened48 0.0207197827
gened3  0.0239369732
gened21 0.0473953101
gened46 0.0485281669

2. Adjust by various methods

## Define function
AdjustP <- function(DF, alpha = 0.05) {

    DF$step.thres <- alpha/rev(seq_len(nrow(DF)))
    DF$step.comp  <- as.numeric(DF[,1] < DF$step.thres)
    DF$bon_p      <- p.adjust(DF[,1], method = "bonferroni")
    DF$holm_p     <- p.adjust(DF[,1], method = "holm")
    DF$hoch_p     <- p.adjust(DF[,1], method = "hochberg")
    DF$fdr_p      <- p.adjust(DF[,1], method = "fdr")

    DF
}
p.val2 <- AdjustP(p.val1)

## head(round(p.val2, 5), 15)
round(p.val2, 5)
          raw_p step.thres step.comp   bon_p  holm_p  hoch_p   fdr_p
gened29 0.00000    0.00100         1 0.00002 0.00002 0.00002 0.00002
gened6  0.00187    0.00102         0 0.09338 0.09151 0.09151 0.03217
gened8  0.00193    0.00104         0 0.09650 0.09264 0.09264 0.03217
gened4  0.00290    0.00106         0 0.14503 0.13633 0.13633 0.03626
gened47 0.00853    0.00109         0 0.42664 0.39251 0.39251 0.08533
gened7  0.01126    0.00111         0 0.56313 0.50682 0.50682 0.09385
gened48 0.02072    0.00114         0 1.00000 0.91167 0.91167 0.14800
gened3  0.02394    0.00116         0 1.00000 1.00000 0.99914 0.14961
gened21 0.04740    0.00119         0 1.00000 1.00000 0.99914 0.24264
gened46 0.04853    0.00122         0 1.00000 1.00000 0.99914 0.24264
gened22 0.06336    0.00125         0 1.00000 1.00000 0.99914 0.27928
gened36 0.06703    0.00128         0 1.00000 1.00000 0.99914 0.27928
gened44 0.15361    0.00132         0 1.00000 1.00000 0.99914 0.59080
gened39 0.21751    0.00135         0 1.00000 1.00000 0.99914 0.74641
gened12 0.23851    0.00139         0 1.00000 1.00000 0.99914 0.74641
gened16 0.24455    0.00143         0 1.00000 1.00000 0.99914 0.74641
gened35 0.25378    0.00147         0 1.00000 1.00000 0.99914 0.74641
gened25 0.27914    0.00152         0 1.00000 1.00000 0.99914 0.77540
gened20 0.36547    0.00156         0 1.00000 1.00000 0.99914 0.96175
gened45 0.40950    0.00161         0 1.00000 1.00000 0.99914 0.99308
gened13 0.43750    0.00167         0 1.00000 1.00000 0.99914 0.99308
gened50 0.47530    0.00172         0 1.00000 1.00000 0.99914 0.99308
gened42 0.50005    0.00179         0 1.00000 1.00000 0.99914 0.99308
gened41 0.50611    0.00185         0 1.00000 1.00000 0.99914 0.99308
gened10 0.57400    0.00192         0 1.00000 1.00000 0.99914 0.99308
gened2  0.58750    0.00200         0 1.00000 1.00000 0.99914 0.99308
gened49 0.61325    0.00208         0 1.00000 1.00000 0.99914 0.99308
gened37 0.64708    0.00217         0 1.00000 1.00000 0.99914 0.99308
gened5  0.66401    0.00227         0 1.00000 1.00000 0.99914 0.99308
gened31 0.66418    0.00238         0 1.00000 1.00000 0.99914 0.99308
gened24 0.67736    0.00250         0 1.00000 1.00000 0.99914 0.99308
gened43 0.69241    0.00263         0 1.00000 1.00000 0.99914 0.99308
gened33 0.70131    0.00278         0 1.00000 1.00000 0.99914 0.99308
gened1  0.72207    0.00294         0 1.00000 1.00000 0.99914 0.99308
gened27 0.74887    0.00312         0 1.00000 1.00000 0.99914 0.99308
gened11 0.76321    0.00333         0 1.00000 1.00000 0.99914 0.99308
gened15 0.76794    0.00357         0 1.00000 1.00000 0.99914 0.99308
gened40 0.80843    0.00385         0 1.00000 1.00000 0.99914 0.99308
gened17 0.80892    0.00417         0 1.00000 1.00000 0.99914 0.99308
gened19 0.81796    0.00455         0 1.00000 1.00000 0.99914 0.99308
gened30 0.82640    0.00500         0 1.00000 1.00000 0.99914 0.99308
gened28 0.89226    0.00556         0 1.00000 1.00000 0.99914 0.99308
gened32 0.89793    0.00625         0 1.00000 1.00000 0.99914 0.99308
gened23 0.91777    0.00714         0 1.00000 1.00000 0.99914 0.99308
gened9  0.92425    0.00833         0 1.00000 1.00000 0.99914 0.99308
gened14 0.92434    0.01000         0 1.00000 1.00000 0.99914 0.99308
gened38 0.93350    0.01250         0 1.00000 1.00000 0.99914 0.99308
gened18 0.97901    0.01667         0 1.00000 1.00000 0.99914 0.99914
gened34 0.99407    0.02500         0 1.00000 1.00000 0.99914 0.99914
gened26 0.99914    0.05000         0 1.00000 1.00000 0.99914 0.99914

3. Repeat logistic regression 50 times

logit3 <- lapply(vasdata[,grep("gened", names(vasdata))],
                function(Var) {
                    glm(vasc ~ Var + age + bmi, data = vasdata, family = binomial)
                })

p.val3 <- data.frame(raw_p = sapply(logit3, function(X) summary(X)$coef["Var",4]))
p.val3 <- p.val3[order(p.val3$raw_p),, drop = F]

## List ones significant at < 0.05 (No adjustment)
p.val3[p.val3$raw_p < 0.05,, drop = F]
               raw_p
gened29 0.0000004686
gened6  0.0016120305
gened8  0.0017685242
gened4  0.0033829641
gened7  0.0066777480
gened47 0.0079777148
gened48 0.0198316797
gened3  0.0226366388
gened46 0.0368833353
gened36 0.0470096753

4. Adjust by various methods

p.val4 <- AdjustP(p.val3)
## head(round(p.val4, 5), 15)
round(p.val4, 5)
          raw_p step.thres step.comp   bon_p  holm_p  hoch_p   fdr_p
gened29 0.00000    0.00100         1 0.00002 0.00002 0.00002 0.00002
gened6  0.00161    0.00102         0 0.08060 0.07899 0.07899 0.02948
gened8  0.00177    0.00104         0 0.08843 0.08489 0.08489 0.02948
gened4  0.00338    0.00106         0 0.16915 0.15900 0.15900 0.04229
gened7  0.00668    0.00109         0 0.33389 0.30718 0.30718 0.06648
gened47 0.00798    0.00111         0 0.39889 0.35900 0.35900 0.06648
gened48 0.01983    0.00114         0 0.99158 0.87259 0.87259 0.14148
gened3  0.02264    0.00116         0 1.00000 0.97338 0.97338 0.14148
gened46 0.03688    0.00119         0 1.00000 1.00000 0.98195 0.20491
gened36 0.04701    0.00122         0 1.00000 1.00000 0.98195 0.23212
gened21 0.05511    0.00125         0 1.00000 1.00000 0.98195 0.23212
gened22 0.05571    0.00128         0 1.00000 1.00000 0.98195 0.23212
gened16 0.18991    0.00132         0 1.00000 1.00000 0.98195 0.69868
gened44 0.20331    0.00135         0 1.00000 1.00000 0.98195 0.69868
gened35 0.20960    0.00139         0 1.00000 1.00000 0.98195 0.69868
gened39 0.22594    0.00143         0 1.00000 1.00000 0.98195 0.70605
gened25 0.25485    0.00147         0 1.00000 1.00000 0.98195 0.74957
gened12 0.28102    0.00152         0 1.00000 1.00000 0.98195 0.78062
gened50 0.39242    0.00156         0 1.00000 1.00000 0.98195 0.98195
gened20 0.42216    0.00161         0 1.00000 1.00000 0.98195 0.98195
gened13 0.42431    0.00167         0 1.00000 1.00000 0.98195 0.98195
gened45 0.43784    0.00172         0 1.00000 1.00000 0.98195 0.98195
gened41 0.53360    0.00179         0 1.00000 1.00000 0.98195 0.98195
gened10 0.55559    0.00185         0 1.00000 1.00000 0.98195 0.98195
gened24 0.59046    0.00192         0 1.00000 1.00000 0.98195 0.98195
gened2  0.63007    0.00200         0 1.00000 1.00000 0.98195 0.98195
gened42 0.63233    0.00208         0 1.00000 1.00000 0.98195 0.98195
gened37 0.65481    0.00217         0 1.00000 1.00000 0.98195 0.98195
gened5  0.67384    0.00227         0 1.00000 1.00000 0.98195 0.98195
gened43 0.68872    0.00238         0 1.00000 1.00000 0.98195 0.98195
gened19 0.72229    0.00250         0 1.00000 1.00000 0.98195 0.98195
gened49 0.72381    0.00263         0 1.00000 1.00000 0.98195 0.98195
gened15 0.73124    0.00278         0 1.00000 1.00000 0.98195 0.98195
gened33 0.78015    0.00294         0 1.00000 1.00000 0.98195 0.98195
gened31 0.82492    0.00312         0 1.00000 1.00000 0.98195 0.98195
gened30 0.83777    0.00333         0 1.00000 1.00000 0.98195 0.98195
gened40 0.84583    0.00357         0 1.00000 1.00000 0.98195 0.98195
gened17 0.84787    0.00385         0 1.00000 1.00000 0.98195 0.98195
gened11 0.85076    0.00417         0 1.00000 1.00000 0.98195 0.98195
gened27 0.85999    0.00455         0 1.00000 1.00000 0.98195 0.98195
gened1  0.86029    0.00500         0 1.00000 1.00000 0.98195 0.98195
gened32 0.86694    0.00556         0 1.00000 1.00000 0.98195 0.98195
gened9  0.87691    0.00625         0 1.00000 1.00000 0.98195 0.98195
gened28 0.88974    0.00714         0 1.00000 1.00000 0.98195 0.98195
gened26 0.93309    0.00833         0 1.00000 1.00000 0.98195 0.98195
gened34 0.93880    0.01000         0 1.00000 1.00000 0.98195 0.98195
gened18 0.94674    0.01250         0 1.00000 1.00000 0.98195 0.98195
gened38 0.96528    0.01667         0 1.00000 1.00000 0.98195 0.98195
gened23 0.96750    0.02500         0 1.00000 1.00000 0.98195 0.98195
gened14 0.98195    0.05000         0 1.00000 1.00000 0.98195 0.98195

5. Logistic regression of permutated data

Create permutation p-values

## Get sample size: 1664
sample.size <- nrow(vasdata)

## Generate permutated patient numbers
perm.list <- lapply(1:100,
                    function(i) sample(x = seq(sample.size), size = sample.size, replace = FALSE))

## Permutated the vasc variable only, and recombine with the rest of the data frame (list of 100 datasets)
perm.data <- lapply(perm.list,
                    function(perm) {                                 # Variables named gened* age bmi only
                        cbind(vasc = vasdata[perm, "vasc"], vasdata[,grep("gened|age|bmi", names(vasdata))])
                    })

## Repeat logistic regression 50 times (50 genes) for each of 100 permutated samples
library(parallel)       # Use MultiCoreListAPPLY in parallel package (2x faster)

system.time(
 list.of.50.p.values <-
    mclapply(perm.data,         # 100 permutated datasets in a list
             function(dat) {

                 ## Run below for each permutated dataset
                 p.values <-
                     sapply(dat[,grep("gened", names(dat))],
                            function(Var) {

                                ## Run logistic regression for each gened* within each permutated dataset
                                res.glm <- glm(vasc ~ Var + age + bmi, data = dat, family = binomial)
                                summary(res.glm)$coef["Var", 4]   # Extract p-value from each logistic regression

                            })

                 p.values <- sort(p.values)     # 50 p-values sorted
                 p.values
             })
    )
   user  system elapsed 
 27.030   0.776  27.891 

Distribution of smallest p-values under null

## Distribution of smallest p-values from each permutated sample
smallest.100.p.values <- sapply(list.of.50.p.values, function(X) min(X))
plot(density(smallest.100.p.values), log = "x")

plot of chunk unnamed-chunk-12

summary(smallest.100.p.values)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00007 0.00743 0.01520 0.02220 0.03270 0.11100 

Permutation test using smallest permutation p-values

## Count permutation p-values (p-values under null) smaller than raw p-values
p.comparison  <- outer(X = smallest.100.p.values, Y = p.val4$raw_p, FUN = "<")
p.val4$perm_p <- apply(p.comparison, 2, sum) / 100
head(round(p.val4, 5), 15)
          raw_p step.thres step.comp   bon_p  holm_p  hoch_p   fdr_p perm_p
gened29 0.00000    0.00100         1 0.00002 0.00002 0.00002 0.00002   0.00
gened6  0.00161    0.00102         0 0.08060 0.07899 0.07899 0.02948   0.09
gened8  0.00177    0.00104         0 0.08843 0.08489 0.08489 0.02948   0.10
gened4  0.00338    0.00106         0 0.16915 0.15900 0.15900 0.04229   0.13
gened7  0.00668    0.00109         0 0.33389 0.30718 0.30718 0.06648   0.24
gened47 0.00798    0.00111         0 0.39889 0.35900 0.35900 0.06648   0.26
gened48 0.01983    0.00114         0 0.99158 0.87259 0.87259 0.14148   0.57
gened3  0.02264    0.00116         0 1.00000 0.97338 0.97338 0.14148   0.62
gened46 0.03688    0.00119         0 1.00000 1.00000 0.98195 0.20491   0.80
gened36 0.04701    0.00122         0 1.00000 1.00000 0.98195 0.23212   0.87
gened21 0.05511    0.00125         0 1.00000 1.00000 0.98195 0.23212   0.90
gened22 0.05571    0.00128         0 1.00000 1.00000 0.98195 0.23212   0.92
gened16 0.18991    0.00132         0 1.00000 1.00000 0.98195 0.69868   1.00
gened44 0.20331    0.00135         0 1.00000 1.00000 0.98195 0.69868   1.00
gened35 0.20960    0.00139         0 1.00000 1.00000 0.98195 0.69868   1.00

Step-down permutation test

Variables are removed from the set of null p-values at each step.

Multiple tests with continuous measures

This time multtest packages is used.

library(gdata)
nutrffq <- read.xls("./nutrffq_dat.xls")        # This file was created in SAS.

## Perform multiple t-tests and extract p-values
res.t.tests <- lapply(nutrffq[,c(2,12:36)], function(X) t.test(X ~ nutrffq$bmi25, var.equal = T))
raw_p <- sapply(res.t.tests, function(X) X$p.val)

## Perform adjustment
library(multtest)
adj_p <- mt.rawp2adjp(raw_p, proc = c("Bonferroni", "Holm", "SidakSS", "Hochberg", "BH")) # BH = FDR
rownames(adj_p$adjp) <- names(raw_p)[adj_p$index]

## Order by raw p
round(adj_p$adjp, 5)
           rawp Bonferroni    Holm SidakSS Hochberg      BH
FKCAL   0.00000    0.00000 0.00000 0.00000  0.00000 0.00000
aprot   0.00000    0.00010 0.00010 0.00010  0.00010 0.00005
asat    0.00005    0.00124 0.00114 0.00124  0.00114 0.00041
atthiri 0.00010    0.00271 0.00239 0.00270  0.00239 0.00068
atotfat 0.00037    0.00967 0.00818 0.00962  0.00818 0.00189
aoleic  0.00044    0.01133 0.00915 0.01127  0.00915 0.00189
atca    0.00066    0.01713 0.01318 0.01699  0.01318 0.00245
atiron  0.00096    0.02508 0.01832 0.02478  0.01832 0.00313
acarb   0.00221    0.05751 0.03981 0.05595  0.03981 0.00639
achol   0.00300    0.07810 0.05107 0.07524  0.05107 0.00781
atvc    0.02030    0.52788 0.32485 0.41334  0.32485 0.04799
advaiu  0.04485    1.00000 0.67276 0.69672  0.67276 0.09683
afiber  0.04842    1.00000 0.67783 0.72482  0.67783 0.09683
adthiam 0.06133    1.00000 0.79724 0.80708  0.79724 0.11389
adniac  0.07561    1.00000 0.90737 0.87052  0.90737 0.13106
alinolc 0.16792    1.00000 1.00000 0.99160  0.97620 0.27287
adca    0.22806    1.00000 1.00000 0.99881  0.97620 0.34879
adribo  0.29535    1.00000 1.00000 0.99989  0.97620 0.42662
advare  0.32609    1.00000 1.00000 0.99997  0.97620 0.44623
TRT     0.39006    1.00000 1.00000 1.00000  0.97620 0.50293
advc    0.40622    1.00000 1.00000 1.00000  0.97620 0.50293
adiron  0.75213    1.00000 1.00000 1.00000  0.97620 0.88888
adna    0.82489    1.00000 1.00000 1.00000  0.97620 0.90490
adk     0.83529    1.00000 1.00000 1.00000  0.97620 0.90490
adphos  0.97174    1.00000 1.00000 1.00000  0.97620 0.97620
atvaiu  0.97620    1.00000 1.00000 1.00000  0.97620 0.97620