Untitled.R

# R in Action (2nd ed): Chapter 7                                     
# Basic statistics                                                    
# requires packages npmc, ggm, gmodels, vcd, Hmisc,                   
#                   pastecs, psych, doBy to be installed              
# install.packages(c("ggm", "gmodels", "vcd", "Hmisc",                
#                    "pastecs", "psych", "doBy"))

mt <- mtcars[c("mpg", "hp", "wt", "am")]
head(mt)

##                    mpg  hp    wt am
## Mazda RX4         21.0 110 2.620  1
## Mazda RX4 Wag     21.0 110 2.875  1
## Datsun 710        22.8  93 2.320  1
## Hornet 4 Drive    21.4 110 3.215  0
## Hornet Sportabout 18.7 175 3.440  0
## Valiant           18.1 105 3.460  0

# Listing 7.1 - Descriptive stats via summary
mt <- mtcars[c("mpg", "hp", "wt", "am")]
summary(mt)

##       mpg              hp              wt              am        
##  Min.   :10.40   Min.   : 52.0   Min.   :1.513   Min.   :0.0000  
##  1st Qu.:15.43   1st Qu.: 96.5   1st Qu.:2.581   1st Qu.:0.0000  
##  Median :19.20   Median :123.0   Median :3.325   Median :0.0000  
##  Mean   :20.09   Mean   :146.7   Mean   :3.217   Mean   :0.4062  
##  3rd Qu.:22.80   3rd Qu.:180.0   3rd Qu.:3.610   3rd Qu.:1.0000  
##  Max.   :33.90   Max.   :335.0   Max.   :5.424   Max.   :1.0000

# Listing 7.2 - descriptive stats via sapply
mystats <- function(x, na.omit=FALSE){
  if (na.omit)
    x <- x[!is.na(x)]
  m <- mean(x)
  n <- length(x)
  s <- sd(x)
  skew <- sum((x-m)^3/s^3)/n
  kurt <- sum((x-m)^4/s^4)/n - 3
  return(c(n=n, mean=m, stdev=s, skew=skew, kurtosis=kurt))
}

myvars <- c("mpg", "hp", "wt")
sapply(mtcars[myvars], mystats)

##                mpg          hp          wt
## n        32.000000  32.0000000 32.00000000
## mean     20.090625 146.6875000  3.21725000
## stdev     6.026948  68.5628685  0.97845744
## skew      0.610655   0.7260237  0.42314646
## kurtosis -0.372766  -0.1355511 -0.02271075

# Listing 7.3 - Descriptive stats via describe (Hmisc)
library(Hmisc)

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## Loading required package: ggplot2

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:base':
## 
##     format.pval, units

myvars <- c("mpg", "hp", "wt")
describe(mtcars[myvars])

## mtcars[myvars] 
## 
##  3  Variables      32  Observations
## ---------------------------------------------------------------------------
## mpg 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       32        0       25    0.999    20.09    6.796    12.00    14.34 
##      .25      .50      .75      .90      .95 
##    15.43    19.20    22.80    30.09    31.30 
## 
## lowest : 10.4 13.3 14.3 14.7 15.0, highest: 26.0 27.3 30.4 32.4 33.9
## ---------------------------------------------------------------------------
## hp 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       32        0       22    0.997    146.7    77.04    63.65    66.00 
##      .25      .50      .75      .90      .95 
##    96.50   123.00   180.00   243.50   253.55 
## 
## lowest :  52  62  65  66  91, highest: 215 230 245 264 335
## ---------------------------------------------------------------------------
## wt 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       32        0       29    0.999    3.217    1.089    1.736    1.956 
##      .25      .50      .75      .90      .95 
##    2.581    3.325    3.610    4.048    5.293 
## 
## lowest : 1.513 1.615 1.835 1.935 2.140, highest: 3.845 4.070 5.250 5.345 5.424
## ---------------------------------------------------------------------------

# Listing 7.,4 - Descriptive stats via stat.desc (pastecs)
library(pastecs)
myvars <- c("mpg", "hp", "wt")
stat.desc(mtcars[myvars])

##                      mpg           hp          wt
## nbr.val       32.0000000   32.0000000  32.0000000
## nbr.null       0.0000000    0.0000000   0.0000000
## nbr.na         0.0000000    0.0000000   0.0000000
## min           10.4000000   52.0000000   1.5130000
## max           33.9000000  335.0000000   5.4240000
## range         23.5000000  283.0000000   3.9110000
## sum          642.9000000 4694.0000000 102.9520000
## median        19.2000000  123.0000000   3.3250000
## mean          20.0906250  146.6875000   3.2172500
## SE.mean        1.0654240   12.1203173   0.1729685
## CI.mean.0.95   2.1729465   24.7195501   0.3527715
## var           36.3241028 4700.8669355   0.9573790
## std.dev        6.0269481   68.5628685   0.9784574
## coef.var       0.2999881    0.4674077   0.3041285

# Listing 7.5 - Descriptive stats via describe (psych)
library(psych)

## 
## Attaching package: 'psych'

## The following object is masked from 'package:Hmisc':
## 
##     describe

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

myvars <- c("mpg", "hp", "wt")
describe(mtcars[myvars])

##     vars  n   mean    sd median trimmed   mad   min    max  range skew
## mpg    1 32  20.09  6.03  19.20   19.70  5.41 10.40  33.90  23.50 0.61
## hp     2 32 146.69 68.56 123.00  141.19 77.10 52.00 335.00 283.00 0.73
## wt     3 32   3.22  0.98   3.33    3.15  0.77  1.51   5.42   3.91 0.42
##     kurtosis    se
## mpg    -0.37  1.07
## hp     -0.14 12.12
## wt     -0.02  0.17

# Listing 7.6 - Descriptive stats by group with aggregate
myvars <- c("mpg", "hp", "wt")
aggregate(mtcars[myvars], by=list(am=mtcars$am), mean)

##   am      mpg       hp       wt
## 1  0 17.14737 160.2632 3.768895
## 2  1 24.39231 126.8462 2.411000

aggregate(mtcars[myvars], by=list(am=mtcars$am), sd)

##   am      mpg       hp        wt
## 1  0 3.833966 53.90820 0.7774001
## 2  1 6.166504 84.06232 0.6169816

# Listing 7.7 - Descriptive stats by group via by
dstats <- function(x)sapply(x, mystats)
myvars <- c("mpg", "hp", "wt")
by(mtcars[myvars], mtcars$am, dstats)

## mtcars$am: 0
##                  mpg           hp         wt
## n        19.00000000  19.00000000 19.0000000
## mean     17.14736842 160.26315789  3.7688947
## stdev     3.83396639  53.90819573  0.7774001
## skew      0.01395038  -0.01422519  0.9759294
## kurtosis -0.80317826  -1.20969733  0.1415676
## -------------------------------------------------------- 
## mtcars$am: 1
##                  mpg          hp         wt
## n        13.00000000  13.0000000 13.0000000
## mean     24.39230769 126.8461538  2.4110000
## stdev     6.16650381  84.0623243  0.6169816
## skew      0.05256118   1.3598859  0.2103128
## kurtosis -1.45535200   0.5634635 -1.1737358

# Listing 7.8 - Descriptive stats by group via summaryBy
library(doBy)
summaryBy(mpg+hp+wt~am, data=mtcars, FUN=mystats)

##   am mpg.n mpg.mean mpg.stdev   mpg.skew mpg.kurtosis hp.n  hp.mean
## 1  0    19 17.14737  3.833966 0.01395038   -0.8031783   19 160.2632
## 2  1    13 24.39231  6.166504 0.05256118   -1.4553520   13 126.8462
##   hp.stdev     hp.skew hp.kurtosis wt.n  wt.mean  wt.stdev   wt.skew
## 1 53.90820 -0.01422519  -1.2096973   19 3.768895 0.7774001 0.9759294
## 2 84.06232  1.35988586   0.5634635   13 2.411000 0.6169816 0.2103128
##   wt.kurtosis
## 1   0.1415676
## 2  -1.1737358

# Listing 7.9 - Descriptive stats by group via describe.by (psych)
library(psych)
myvars <- c("mpg", "hp", "wt")
describeBy(mtcars[myvars], list(am=mtcars$am))

## 
##  Descriptive statistics by group 
## am: 0
##     vars  n   mean    sd median trimmed   mad   min    max  range  skew
## mpg    1 19  17.15  3.83  17.30   17.12  3.11 10.40  24.40  14.00  0.01
## hp     2 19 160.26 53.91 175.00  161.06 77.10 62.00 245.00 183.00 -0.01
## wt     3 19   3.77  0.78   3.52    3.75  0.45  2.46   5.42   2.96  0.98
##     kurtosis    se
## mpg    -0.80  0.88
## hp     -1.21 12.37
## wt      0.14  0.18
## -------------------------------------------------------- 
## am: 1
##     vars  n   mean    sd median trimmed   mad   min    max  range skew
## mpg    1 13  24.39  6.17  22.80   24.38  6.67 15.00  33.90  18.90 0.05
## hp     2 13 126.85 84.06 109.00  114.73 63.75 52.00 335.00 283.00 1.36
## wt     3 13   2.41  0.62   2.32    2.39  0.68  1.51   3.57   2.06 0.21
##     kurtosis    se
## mpg    -1.46  1.71
## hp      0.56 23.31
## wt     -1.17  0.17

# summary statistics by group via the reshape package
library(reshape)
dstats <- function(x)(c(n=length(x), mean=mean(x), sd=sd(x)))
dfm <- melt(mtcars, measure.vars=c("mpg", "hp", "wt"), 
            id.vars=c("am", "cyl"))
cast(dfm, am + cyl + variable ~ ., dstats)

##    am cyl variable  n       mean         sd
## 1   0   4      mpg  3  22.900000  1.4525839
## 2   0   4       hp  3  84.666667 19.6553640
## 3   0   4       wt  3   2.935000  0.4075230
## 4   0   6      mpg  4  19.125000  1.6317169
## 5   0   6       hp  4 115.250000  9.1787799
## 6   0   6       wt  4   3.388750  0.1162164
## 7   0   8      mpg 12  15.050000  2.7743959
## 8   0   8       hp 12 194.166667 33.3598379
## 9   0   8       wt 12   4.104083  0.7683069
## 10  1   4      mpg  8  28.075000  4.4838599
## 11  1   4       hp  8  81.875000 22.6554156
## 12  1   4       wt  8   2.042250  0.4093485
## 13  1   6      mpg  3  20.566667  0.7505553
## 14  1   6       hp  3 131.666667 37.5277675
## 15  1   6       wt  3   2.755000  0.1281601
## 16  1   8      mpg  2  15.400000  0.5656854
## 17  1   8       hp  2 299.500000 50.2045815
## 18  1   8       wt  2   3.370000  0.2828427

# frequency tables
library(vcd)

## Loading required package: grid

head(Arthritis)

##   ID Treatment  Sex Age Improved
## 1 57   Treated Male  27     Some
## 2 46   Treated Male  29     None
## 3 77   Treated Male  30     None
## 4 17   Treated Male  32   Marked
## 5 36   Treated Male  46   Marked
## 6 23   Treated Male  58   Marked

# one way table
mytable <- with(Arthritis, table(Improved))
mytable  # frequencies

## Improved
##   None   Some Marked 
##     42     14     28

prop.table(mytable) # proportions

## Improved
##      None      Some    Marked 
## 0.5000000 0.1666667 0.3333333

prop.table(mytable)*100 # percentages

## Improved
##     None     Some   Marked 
## 50.00000 16.66667 33.33333

# two way table
mytable <- xtabs(~ Treatment+Improved, data=Arthritis)
mytable # frequencies

##          Improved
## Treatment None Some Marked
##   Placebo   29    7      7
##   Treated   13    7     21

margin.table(mytable,1) #row sums

## Treatment
## Placebo Treated 
##      43      41

margin.table(mytable, 2) # column sums

## Improved
##   None   Some Marked 
##     42     14     28

prop.table(mytable) # cell proportions

##          Improved
## Treatment       None       Some     Marked
##   Placebo 0.34523810 0.08333333 0.08333333
##   Treated 0.15476190 0.08333333 0.25000000

prop.table(mytable, 1) # row proportions

##          Improved
## Treatment      None      Some    Marked
##   Placebo 0.6744186 0.1627907 0.1627907
##   Treated 0.3170732 0.1707317 0.5121951

prop.table(mytable, 2) # column proportions

##          Improved
## Treatment      None      Some    Marked
##   Placebo 0.6904762 0.5000000 0.2500000
##   Treated 0.3095238 0.5000000 0.7500000

addmargins(mytable) # add row and column sums to table

##          Improved
## Treatment None Some Marked Sum
##   Placebo   29    7      7  43
##   Treated   13    7     21  41
##   Sum       42   14     28  84

# more complex tables
addmargins(prop.table(mytable))

##          Improved
## Treatment       None       Some     Marked        Sum
##   Placebo 0.34523810 0.08333333 0.08333333 0.51190476
##   Treated 0.15476190 0.08333333 0.25000000 0.48809524
##   Sum     0.50000000 0.16666667 0.33333333 1.00000000

addmargins(prop.table(mytable, 1), 2)

##          Improved
## Treatment      None      Some    Marked       Sum
##   Placebo 0.6744186 0.1627907 0.1627907 1.0000000
##   Treated 0.3170732 0.1707317 0.5121951 1.0000000

addmargins(prop.table(mytable, 2), 1)

##          Improved
## Treatment      None      Some    Marked
##   Placebo 0.6904762 0.5000000 0.2500000
##   Treated 0.3095238 0.5000000 0.7500000
##   Sum     1.0000000 1.0000000 1.0000000

# Listing 7.10 - Two way table using CrossTable
library(gmodels)
CrossTable(Arthritis$Treatment, Arthritis$Improved)

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  84 
## 
##  
##                     | Arthritis$Improved 
## Arthritis$Treatment |      None |      Some |    Marked | Row Total | 
## --------------------|-----------|-----------|-----------|-----------|
##             Placebo |        29 |         7 |         7 |        43 | 
##                     |     2.616 |     0.004 |     3.752 |           | 
##                     |     0.674 |     0.163 |     0.163 |     0.512 | 
##                     |     0.690 |     0.500 |     0.250 |           | 
##                     |     0.345 |     0.083 |     0.083 |           | 
## --------------------|-----------|-----------|-----------|-----------|
##             Treated |        13 |         7 |        21 |        41 | 
##                     |     2.744 |     0.004 |     3.935 |           | 
##                     |     0.317 |     0.171 |     0.512 |     0.488 | 
##                     |     0.310 |     0.500 |     0.750 |           | 
##                     |     0.155 |     0.083 |     0.250 |           | 
## --------------------|-----------|-----------|-----------|-----------|
##        Column Total |        42 |        14 |        28 |        84 | 
##                     |     0.500 |     0.167 |     0.333 |           | 
## --------------------|-----------|-----------|-----------|-----------|
## 
##

# Listing 7.11 - Three way table
mytable <- xtabs(~ Treatment+Sex+Improved, data=Arthritis)
mytable

## , , Improved = None
## 
##          Sex
## Treatment Female Male
##   Placebo     19   10
##   Treated      6    7
## 
## , , Improved = Some
## 
##          Sex
## Treatment Female Male
##   Placebo      7    0
##   Treated      5    2
## 
## , , Improved = Marked
## 
##          Sex
## Treatment Female Male
##   Placebo      6    1
##   Treated     16    5

ftable(mytable)

##                  Improved None Some Marked
## Treatment Sex                             
## Placebo   Female            19    7      6
##           Male              10    0      1
## Treated   Female             6    5     16
##           Male               7    2      5

margin.table(mytable, 1)

## Treatment
## Placebo Treated 
##      43      41

margin.table(mytable, 2)

## Sex
## Female   Male 
##     59     25

margin.table(mytable, 2)

## Sex
## Female   Male 
##     59     25

margin.table(mytable, c(1,3))

##          Improved
## Treatment None Some Marked
##   Placebo   29    7      7
##   Treated   13    7     21

ftable(prop.table(mytable, c(1,2)))

##                  Improved       None       Some     Marked
## Treatment Sex                                             
## Placebo   Female          0.59375000 0.21875000 0.18750000
##           Male            0.90909091 0.00000000 0.09090909
## Treated   Female          0.22222222 0.18518519 0.59259259
##           Male            0.50000000 0.14285714 0.35714286

ftable(addmargins(prop.table(mytable, c(1, 2)), 3))

##                  Improved       None       Some     Marked        Sum
## Treatment Sex                                                        
## Placebo   Female          0.59375000 0.21875000 0.18750000 1.00000000
##           Male            0.90909091 0.00000000 0.09090909 1.00000000
## Treated   Female          0.22222222 0.18518519 0.59259259 1.00000000
##           Male            0.50000000 0.14285714 0.35714286 1.00000000

# Listing 7.12 - Chi-square test of independence
library(vcd)
mytable <- xtabs(~Treatment+Improved, data=Arthritis)
chisq.test(mytable)

## 
##  Pearson's Chi-squared test
## 
## data:  mytable
## X-squared = 13.055, df = 2, p-value = 0.001463

mytable <- xtabs(~Improved+Sex, data=Arthritis)
chisq.test(mytable)

## Warning in chisq.test(mytable): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  mytable
## X-squared = 4.8407, df = 2, p-value = 0.08889

# Fisher's exact test
mytable <- xtabs(~Treatment+Improved, data=Arthritis)
fisher.test(mytable)

## 
##  Fisher's Exact Test for Count Data
## 
## data:  mytable
## p-value = 0.001393
## alternative hypothesis: two.sided

# Chochran-Mantel-Haenszel test
mytable <- xtabs(~Treatment+Improved+Sex, data=Arthritis)
mantelhaen.test(mytable)

## 
##  Cochran-Mantel-Haenszel test
## 
## data:  mytable
## Cochran-Mantel-Haenszel M^2 = 14.632, df = 2, p-value = 0.0006647

# Listing 7.13 - Measures of association for a two-way table
library(vcd)
mytable <- xtabs(~Treatment+Improved, data=Arthritis)
assocstats(mytable)

##                     X^2 df  P(> X^2)
## Likelihood Ratio 13.530  2 0.0011536
## Pearson          13.055  2 0.0014626
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.367 
## Cramer's V        : 0.394

# Listing 7.14 Covariances and correlations
states<- state.x77[,1:6]
cov(states)

##               Population      Income   Illiteracy     Life Exp      Murder
## Population 19931683.7588 571229.7796  292.8679592 -407.8424612 5663.523714
## Income       571229.7796 377573.3061 -163.7020408  280.6631837 -521.894286
## Illiteracy      292.8680   -163.7020    0.3715306   -0.4815122    1.581776
## Life Exp       -407.8425    280.6632   -0.4815122    1.8020204   -3.869480
## Murder         5663.5237   -521.8943    1.5817755   -3.8694804   13.627465
## HS Grad       -3551.5096   3076.7690   -3.2354694    6.3126849  -14.549616
##                 HS Grad
## Population -3551.509551
## Income      3076.768980
## Illiteracy    -3.235469
## Life Exp       6.312685
## Murder       -14.549616
## HS Grad       65.237894

cor(states)

##             Population     Income Illiteracy    Life Exp     Murder
## Population  1.00000000  0.2082276  0.1076224 -0.06805195  0.3436428
## Income      0.20822756  1.0000000 -0.4370752  0.34025534 -0.2300776
## Illiteracy  0.10762237 -0.4370752  1.0000000 -0.58847793  0.7029752
## Life Exp   -0.06805195  0.3402553 -0.5884779  1.00000000 -0.7808458
## Murder      0.34364275 -0.2300776  0.7029752 -0.78084575  1.0000000
## HS Grad    -0.09848975  0.6199323 -0.6571886  0.58221620 -0.4879710
##                HS Grad
## Population -0.09848975
## Income      0.61993232
## Illiteracy -0.65718861
## Life Exp    0.58221620
## Murder     -0.48797102
## HS Grad     1.00000000

cor(states, method="spearman")

##            Population     Income Illiteracy   Life Exp     Murder
## Population  1.0000000  0.1246098  0.3130496 -0.1040171  0.3457401
## Income      0.1246098  1.0000000 -0.3145948  0.3241050 -0.2174623
## Illiteracy  0.3130496 -0.3145948  1.0000000 -0.5553735  0.6723592
## Life Exp   -0.1040171  0.3241050 -0.5553735  1.0000000 -0.7802406
## Murder      0.3457401 -0.2174623  0.6723592 -0.7802406  1.0000000
## HS Grad    -0.3833649  0.5104809 -0.6545396  0.5239410 -0.4367330
##               HS Grad
## Population -0.3833649
## Income      0.5104809
## Illiteracy -0.6545396
## Life Exp    0.5239410
## Murder     -0.4367330
## HS Grad     1.0000000

x <- states[,c("Population", "Income", "Illiteracy", "HS Grad")]
y <- states[,c("Life Exp", "Murder")]
cor(x,y)

##               Life Exp     Murder
## Population -0.06805195  0.3436428
## Income      0.34025534 -0.2300776
## Illiteracy -0.58847793  0.7029752
## HS Grad     0.58221620 -0.4879710

# partial correlations
library(ggm)

## Loading required package: igraph

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

## 
## Attaching package: 'ggm'

## The following object is masked from 'package:igraph':
## 
##     pa

## The following object is masked from 'package:Hmisc':
## 
##     rcorr

# partial correlation of population and murder rate, controlling
# for income, illiteracy rate, and HS graduation rate
pcor(c(1,5,2,3,6), cov(states))

## [1] 0.3462724

# Listing 7.15 - Testing a correlation coefficient for significance
cor.test(states[,3], states[,5])

## 
##  Pearson's product-moment correlation
## 
## data:  states[, 3] and states[, 5]
## t = 6.8479, df = 48, p-value = 1.258e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5279280 0.8207295
## sample estimates:
##       cor 
## 0.7029752

# Listing 7.16 - Correlation matrix and tests of significance via corr.test
library(psych)
corr.test(states, use="complete")

## Call:corr.test(x = states, use = "complete")
## Correlation matrix 
##            Population Income Illiteracy Life Exp Murder HS Grad
## Population       1.00   0.21       0.11    -0.07   0.34   -0.10
## Income           0.21   1.00      -0.44     0.34  -0.23    0.62
## Illiteracy       0.11  -0.44       1.00    -0.59   0.70   -0.66
## Life Exp        -0.07   0.34      -0.59     1.00  -0.78    0.58
## Murder           0.34  -0.23       0.70    -0.78   1.00   -0.49
## HS Grad         -0.10   0.62      -0.66     0.58  -0.49    1.00
## Sample Size 
## [1] 50
## Probability values (Entries above the diagonal are adjusted for multiple tests.) 
##            Population Income Illiteracy Life Exp Murder HS Grad
## Population       0.00   0.59       1.00      1.0   0.10       1
## Income           0.15   0.00       0.01      0.1   0.54       0
## Illiteracy       0.46   0.00       0.00      0.0   0.00       0
## Life Exp         0.64   0.02       0.00      0.0   0.00       0
## Murder           0.01   0.11       0.00      0.0   0.00       0
## HS Grad          0.50   0.00       0.00      0.0   0.00       0
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option

# t test
library(MASS)
t.test(Prob ~ So, data=UScrime)

## 
##  Welch Two Sample t-test
## 
## data:  Prob by So
## t = -3.8954, df = 24.925, p-value = 0.0006506
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03852569 -0.01187439
## sample estimates:
## mean in group 0 mean in group 1 
##      0.03851265      0.06371269

# dependent t test
sapply(UScrime[c("U1","U2")], function(x)(c(mean=mean(x),sd=sd(x))))

##            U1       U2
## mean 95.46809 33.97872
## sd   18.02878  8.44545

with(UScrime, t.test(U1, U2, paired=TRUE))

## 
##  Paired t-test
## 
## data:  U1 and U2
## t = 32.407, df = 46, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  57.67003 65.30870
## sample estimates:
## mean of the differences 
##                61.48936

# Wilcoxon two group comparison
with(UScrime, by(Prob, So, median))

## So: 0
## [1] 0.038201
## -------------------------------------------------------- 
## So: 1
## [1] 0.055552

wilcox.test(Prob ~ So, data=UScrime)

## 
##  Wilcoxon rank sum test
## 
## data:  Prob by So
## W = 81, p-value = 8.488e-05
## alternative hypothesis: true location shift is not equal to 0

sapply(UScrime[c("U1", "U2")], median)

## U1 U2 
## 92 34

with(UScrime, wilcox.test(U1, U2, paired=TRUE))

## Warning in wilcox.test.default(U1, U2, paired = TRUE): cannot compute exact
## p-value with ties

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  U1 and U2
## V = 1128, p-value = 2.464e-09
## alternative hypothesis: true location shift is not equal to 0

# Kruskal Wallis test
states <- data.frame(state.region, state.x77)
kruskal.test(Illiteracy ~ state.region, data=states)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  Illiteracy by state.region
## Kruskal-Wallis chi-squared = 22.672, df = 3, p-value = 4.726e-05

# Listing 7.17 - Nonparametric multiple comparisons
source("http://www.statmethods.net/RiA/wmc.txt")              
states <- data.frame(state.region, state.x77)
wmc(Illiteracy ~ state.region, data=states, method="holm")

## Descriptive Statistics
## 
##            West North Central Northeast    South
## n      13.00000      12.00000   9.00000 16.00000
## median  0.60000       0.70000   1.10000  1.75000
## mad     0.14826       0.14826   0.29652  0.59304
## 
## Multiple Comparisons (Wilcoxon Rank Sum Tests)
## Probability Adjustment = holm
## 
##         Group.1       Group.2    W            p    
## 1          West North Central 88.0 8.665618e-01    
## 2          West     Northeast 46.5 8.665618e-01    
## 3          West         South 39.0 1.788186e-02   *
## 4 North Central     Northeast 20.5 5.359707e-02   .
## 5 North Central         South  2.0 8.051509e-05 ***
## 6     Northeast         South 18.0 1.187644e-02   *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Untitled.R

jaehochang

Tue May 15 18:39:16 2018