Ch12_Resampling_statistics_and

par(ask=TRUE)

# Listing 12.1 - t-test vs. oneway permutation test for the hypothetical data
library(coin); library(MASS)

## Loading required package: survival

score <- c(40, 57, 45, 55, 58, 57, 64, 55, 62, 65)
treatment <- factor(c(rep("A",5), rep("B",5)))
mydata <- data.frame(treatment, score)
t.test(score~treatment, data=mydata, var.equal=TRUE)

## 
##  Two Sample t-test
## 
## data:  score by treatment
## t = -2.345, df = 8, p-value = 0.04705
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -19.0405455  -0.1594545
## sample estimates:
## mean in group A mean in group B 
##            51.0            60.6

oneway_test(score~treatment, data=mydata, distribution="exact")

## 
##  Exact Two-Sample Fisher-Pitman Permutation Test
## 
## data:  score by treatment (A, B)
## Z = -1.9147, p-value = 0.07143
## alternative hypothesis: true mu is not equal to 0

# Wilcoxon Mann-Whitney U test
UScrime <- transform(UScrime, So = factor(So))
wilcox_test(Prob ~ So, data=UScrime, distribution="exact")

## 
##  Exact Wilcoxon-Mann-Whitney Test
## 
## data:  Prob by So (0, 1)
## Z = -3.7493, p-value = 8.488e-05
## alternative hypothesis: true mu is not equal to 0

# k sample test
library(multcomp)

## Loading required package: mvtnorm

## Loading required package: TH.data

## 
## Attaching package: 'TH.data'

## The following object is masked from 'package:MASS':
## 
##     geyser

set.seed(1234) # make results reproducible
oneway_test(response~trt, data=cholesterol, 
            distribution=approximate(B=9999))

## 
##  Approximative K-Sample Fisher-Pitman Permutation Test
## 
## data:  response by
##   trt (1time, 2times, 4times, drugD, drugE)
## chi-squared = 36.381, p-value < 2.2e-16

# independence in contingency tables
library(coin)
library(vcd)

## Loading required package: grid

Arthritis <- transform(Arthritis, 
                       Improved = as.factor(as.numeric(Improved)))
set.seed(1234)
chisq_test(Treatment~Improved, data=Arthritis,
           distribution=approximate(B=9999))

## 
##  Approximative Pearson Chi-Squared Test
## 
## data:  Treatment by Improved (1, 2, 3)
## chi-squared = 13.055, p-value = 0.0018

# independence between numeric variables
states <- as.data.frame(state.x77)
set.seed(1234)
spearman_test(Illiteracy ~ Murder, data=states, 
              distribution=approximate(B=9999))

## 
##  Approximative Spearman Correlation Test
## 
## data:  Illiteracy by Murder
## Z = 4.7065, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0

# dependent 2-sample and k-sample tests
library(coin)
library(MASS)
wilcoxsign_test(U1 ~ U2, data=UScrime, distribution="exact")

## 
##  Exact Wilcoxon-Pratt Signed-Rank Test
## 
## data:  y by x (pos, neg) 
##   stratified by block
## Z = 5.9691, p-value = 1.421e-14
## alternative hypothesis: true mu is not equal to 0

# Listing 12.2 - Permutation tests for simple linear regression
library(lmPerm)
set.seed(1234)
fit <- lmp(weight ~ height, data=women, perm="Prob")

## [1] "Settings:  unique SS : numeric variables centered"

summary(fit)

## 
## Call:
## lmp(formula = weight ~ height, data = women, perm = "Prob")
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7333 -1.1333 -0.3833  0.7417  3.1167 
## 
## Coefficients:
##        Estimate Iter Pr(Prob)    
## height     3.45 5000   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.525 on 13 degrees of freedom
## Multiple R-Squared: 0.991,   Adjusted R-squared: 0.9903 
## F-statistic:  1433 on 1 and 13 DF,  p-value: 1.091e-14

# Listing 12.3 - Permutation tests for polynomial regression
library(lmPerm)
set.seed(1234)
fit <- lmp(weight ~ height + I(height^2), data=women, perm="Prob")

## [1] "Settings:  unique SS : numeric variables centered"

summary(fit)

## 
## Call:
## lmp(formula = weight ~ height + I(height^2), data = women, perm = "Prob")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.509405 -0.296105 -0.009405  0.286151  0.597059 
## 
## Coefficients:
##             Estimate Iter Pr(Prob)    
## height      -7.34832 5000   <2e-16 ***
## I(height^2)  0.08306 5000   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3841 on 12 degrees of freedom
## Multiple R-Squared: 0.9995,  Adjusted R-squared: 0.9994 
## F-statistic: 1.139e+04 on 2 and 12 DF,  p-value: < 2.2e-16

# Listing 12.4 - Permutation tests for multiple regression
library(lmPerm)
set.seed(1234)
states <- as.data.frame(state.x77)
fit <- lmp(Murder ~ Population + Illiteracy+Income+Frost,data=states, perm="Prob")

## [1] "Settings:  unique SS : numeric variables centered"

summary(fit)

## 
## Call:
## lmp(formula = Murder ~ Population + Illiteracy + Income + Frost, 
##     data = states, perm = "Prob")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -4.79597 -1.64946 -0.08112  1.48150  7.62104 
## 
## Coefficients:
##             Estimate Iter Pr(Prob)    
## Population 2.237e-04   51   1.0000    
## Illiteracy 4.143e+00 5000   0.0004 ***
## Income     6.442e-05   51   1.0000    
## Frost      5.813e-04   51   0.8627    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.535 on 45 degrees of freedom
## Multiple R-Squared: 0.567,   Adjusted R-squared: 0.5285 
## F-statistic: 14.73 on 4 and 45 DF,  p-value: 9.133e-08

# Listing 12.5 - Permutation test for One-Way ANOVA
library(lmPerm)
library(multcomp)
set.seed(1234)
fit <- lmp(response ~ trt, data=cholesterol, perm="Prob")

## [1] "Settings:  unique SS "

anova(fit)

## Analysis of Variance Table
## 
## Response: response
##           Df R Sum Sq R Mean Sq Iter  Pr(Prob)    
## trt        4  1351.37    337.84 5000 < 2.2e-16 ***
## Residuals 45   468.75     10.42                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#  Listing 12.6 - Permutation test for One-Way ANCOVA
library(lmPerm)
set.seed(1234)
fit <- lmp(weight ~ gesttime + dose, data=litter, perm="Prob")

## [1] "Settings:  unique SS : numeric variables centered"

anova(fit)

## Analysis of Variance Table
## 
## Response: weight
##           Df R Sum Sq R Mean Sq Iter Pr(Prob)    
## gesttime   1   161.49   161.493 5000   0.0006 ***
## dose       3   137.12    45.708 5000   0.0392 *  
## Residuals 69  1151.27    16.685                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Listing 12.7 - Permutation test for Two-way ANOVA
library(lmPerm)
set.seed(1234)
fit <- lmp(len ~ supp*dose, data=ToothGrowth, perm="Prob")

## [1] "Settings:  unique SS : numeric variables centered"

anova(fit)

## Analysis of Variance Table
## 
## Response: len
##           Df R Sum Sq R Mean Sq Iter Pr(Prob)    
## supp       1   205.35    205.35 5000  < 2e-16 ***
## dose       1  2224.30   2224.30 5000  < 2e-16 ***
## supp:dose  1    88.92     88.92 2032  0.04724 *  
## Residuals 56   933.63     16.67                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# bootstrapping a single statistic (R2)
rsq <- function(formula, data, indices) {
  d <- data[indices,]
  fit <- lm(formula, data=d)
  return(summary(fit)$r.square)
} 

library(boot)

## 
## Attaching package: 'boot'

## The following object is masked from 'package:survival':
## 
##     aml

set.seed(1234)
results <- boot(data=mtcars, statistic=rsq, 
                R=1000, formula=mpg~wt+disp)
print(results)

## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = mtcars, statistic = rsq, R = 1000, formula = mpg ~ 
##     wt + disp)
## 
## 
## Bootstrap Statistics :
##      original    bias    std. error
## t1* 0.7809306 0.0133367  0.05068926

plot(results)

boot.ci(results, type=c("perc", "bca"))

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = results, type = c("perc", "bca"))
## 
## Intervals : 
## Level     Percentile            BCa          
## 95%   ( 0.6838,  0.8833 )   ( 0.6344,  0.8549 )  
## Calculations and Intervals on Original Scale
## Some BCa intervals may be unstable

# bootstrapping several statistics (regression coefficients)
bs <- function(formula, data, indices) {                
  d <- data[indices,]                                    
  fit <- lm(formula, data=d)                                                                                                
  return(coef(fit))                                    
}
library(boot)
set.seed(1234)
results <- boot(data=mtcars, statistic=bs,             
                R=1000, formula=mpg~wt+disp) 

print(results)

## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = mtcars, statistic = bs, R = 1000, formula = mpg ~ 
##     wt + disp)
## 
## 
## Bootstrap Statistics :
##        original        bias    std. error
## t1* 34.96055404  0.1378732942 2.485756673
## t2* -3.35082533 -0.0539040965 1.170427539
## t3* -0.01772474 -0.0001208075 0.008786803

plot(results, index=2)

boot.ci(results, type="bca", index=2)

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = results, type = "bca", index = 2)
## 
## Intervals : 
## Level       BCa          
## 95%   (-5.655, -1.194 )  
## Calculations and Intervals on Original Scale

boot.ci(results, type="bca", index=3)

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = results, type = "bca", index = 3)
## 
## Intervals : 
## Level       BCa          
## 95%   (-0.0331,  0.0010 )  
## Calculations and Intervals on Original Scale

Ch12_Resampling_statistics_and_bootstrapping.R

jaehochang

Mon Jan 8 23:04:55 2018