# R in Action (2nd ed): Chapter 7
# Basic statistics
# requires packages npmc, ggm, gmodels, vcd, Hmisc,
# pastecs, psych, doBy to be installed
# install.packages(c("ggm", "gmodels", "vcd", "Hmisc",
# "pastecs", "psych", "doBy"))
mt <- mtcars[c("mpg", "hp", "wt", "am")]
head(mt)
## mpg hp wt am
## Mazda RX4 21.0 110 2.620 1
## Mazda RX4 Wag 21.0 110 2.875 1
## Datsun 710 22.8 93 2.320 1
## Hornet 4 Drive 21.4 110 3.215 0
## Hornet Sportabout 18.7 175 3.440 0
## Valiant 18.1 105 3.460 0
# Listing 7.1 - Descriptive stats via summary
mt <- mtcars[c("mpg", "hp", "wt", "am")]
summary(mt)
## mpg hp wt am
## Min. :10.40 Min. : 52.0 Min. :1.513 Min. :0.0000
## 1st Qu.:15.43 1st Qu.: 96.5 1st Qu.:2.581 1st Qu.:0.0000
## Median :19.20 Median :123.0 Median :3.325 Median :0.0000
## Mean :20.09 Mean :146.7 Mean :3.217 Mean :0.4062
## 3rd Qu.:22.80 3rd Qu.:180.0 3rd Qu.:3.610 3rd Qu.:1.0000
## Max. :33.90 Max. :335.0 Max. :5.424 Max. :1.0000
# Listing 7.2 - descriptive stats via sapply
mystats <- function(x, na.omit=FALSE){
if (na.omit)
x <- x[!is.na(x)]
m <- mean(x)
n <- length(x)
s <- sd(x)
skew <- sum((x-m)^3/s^3)/n
kurt <- sum((x-m)^4/s^4)/n - 3
return(c(n=n, mean=m, stdev=s, skew=skew, kurtosis=kurt))
}
myvars <- c("mpg", "hp", "wt")
sapply(mtcars[myvars], mystats)
## mpg hp wt
## n 32.000000 32.0000000 32.00000000
## mean 20.090625 146.6875000 3.21725000
## stdev 6.026948 68.5628685 0.97845744
## skew 0.610655 0.7260237 0.42314646
## kurtosis -0.372766 -0.1355511 -0.02271075
# Listing 7.3 - Descriptive stats via describe (Hmisc)
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
myvars <- c("mpg", "hp", "wt")
describe(mtcars[myvars])
## mtcars[myvars]
##
## 3 Variables 32 Observations
## ---------------------------------------------------------------------------
## mpg
## n missing distinct Info Mean Gmd .05 .10
## 32 0 25 0.999 20.09 6.796 12.00 14.34
## .25 .50 .75 .90 .95
## 15.43 19.20 22.80 30.09 31.30
##
## lowest : 10.4 13.3 14.3 14.7 15.0, highest: 26.0 27.3 30.4 32.4 33.9
## ---------------------------------------------------------------------------
## hp
## n missing distinct Info Mean Gmd .05 .10
## 32 0 22 0.997 146.7 77.04 63.65 66.00
## .25 .50 .75 .90 .95
## 96.50 123.00 180.00 243.50 253.55
##
## lowest : 52 62 65 66 91, highest: 215 230 245 264 335
## ---------------------------------------------------------------------------
## wt
## n missing distinct Info Mean Gmd .05 .10
## 32 0 29 0.999 3.217 1.089 1.736 1.956
## .25 .50 .75 .90 .95
## 2.581 3.325 3.610 4.048 5.293
##
## lowest : 1.513 1.615 1.835 1.935 2.140, highest: 3.845 4.070 5.250 5.345 5.424
## ---------------------------------------------------------------------------
# Listing 7.,4 - Descriptive stats via stat.desc (pastecs)
library(pastecs)
myvars <- c("mpg", "hp", "wt")
stat.desc(mtcars[myvars])
## mpg hp wt
## nbr.val 32.0000000 32.0000000 32.0000000
## nbr.null 0.0000000 0.0000000 0.0000000
## nbr.na 0.0000000 0.0000000 0.0000000
## min 10.4000000 52.0000000 1.5130000
## max 33.9000000 335.0000000 5.4240000
## range 23.5000000 283.0000000 3.9110000
## sum 642.9000000 4694.0000000 102.9520000
## median 19.2000000 123.0000000 3.3250000
## mean 20.0906250 146.6875000 3.2172500
## SE.mean 1.0654240 12.1203173 0.1729685
## CI.mean.0.95 2.1729465 24.7195501 0.3527715
## var 36.3241028 4700.8669355 0.9573790
## std.dev 6.0269481 68.5628685 0.9784574
## coef.var 0.2999881 0.4674077 0.3041285
# Listing 7.5 - Descriptive stats via describe (psych)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
##
## describe
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
myvars <- c("mpg", "hp", "wt")
describe(mtcars[myvars])
## vars n mean sd median trimmed mad min max range skew
## mpg 1 32 20.09 6.03 19.20 19.70 5.41 10.40 33.90 23.50 0.61
## hp 2 32 146.69 68.56 123.00 141.19 77.10 52.00 335.00 283.00 0.73
## wt 3 32 3.22 0.98 3.33 3.15 0.77 1.51 5.42 3.91 0.42
## kurtosis se
## mpg -0.37 1.07
## hp -0.14 12.12
## wt -0.02 0.17
# Listing 7.6 - Descriptive stats by group with aggregate
myvars <- c("mpg", "hp", "wt")
aggregate(mtcars[myvars], by=list(am=mtcars$am), mean)
## am mpg hp wt
## 1 0 17.14737 160.2632 3.768895
## 2 1 24.39231 126.8462 2.411000
aggregate(mtcars[myvars], by=list(am=mtcars$am), sd)
## am mpg hp wt
## 1 0 3.833966 53.90820 0.7774001
## 2 1 6.166504 84.06232 0.6169816
# Listing 7.7 - Descriptive stats by group via by
dstats <- function(x)sapply(x, mystats)
myvars <- c("mpg", "hp", "wt")
by(mtcars[myvars], mtcars$am, dstats)
## mtcars$am: 0
## mpg hp wt
## n 19.00000000 19.00000000 19.0000000
## mean 17.14736842 160.26315789 3.7688947
## stdev 3.83396639 53.90819573 0.7774001
## skew 0.01395038 -0.01422519 0.9759294
## kurtosis -0.80317826 -1.20969733 0.1415676
## --------------------------------------------------------
## mtcars$am: 1
## mpg hp wt
## n 13.00000000 13.0000000 13.0000000
## mean 24.39230769 126.8461538 2.4110000
## stdev 6.16650381 84.0623243 0.6169816
## skew 0.05256118 1.3598859 0.2103128
## kurtosis -1.45535200 0.5634635 -1.1737358
# Listing 7.8 - Descriptive stats by group via summaryBy
library(doBy)
summaryBy(mpg+hp+wt~am, data=mtcars, FUN=mystats)
## am mpg.n mpg.mean mpg.stdev mpg.skew mpg.kurtosis hp.n hp.mean
## 1 0 19 17.14737 3.833966 0.01395038 -0.8031783 19 160.2632
## 2 1 13 24.39231 6.166504 0.05256118 -1.4553520 13 126.8462
## hp.stdev hp.skew hp.kurtosis wt.n wt.mean wt.stdev wt.skew
## 1 53.90820 -0.01422519 -1.2096973 19 3.768895 0.7774001 0.9759294
## 2 84.06232 1.35988586 0.5634635 13 2.411000 0.6169816 0.2103128
## wt.kurtosis
## 1 0.1415676
## 2 -1.1737358
# Listing 7.9 - Descriptive stats by group via describe.by (psych)
library(psych)
myvars <- c("mpg", "hp", "wt")
describeBy(mtcars[myvars], list(am=mtcars$am))
##
## Descriptive statistics by group
## am: 0
## vars n mean sd median trimmed mad min max range skew
## mpg 1 19 17.15 3.83 17.30 17.12 3.11 10.40 24.40 14.00 0.01
## hp 2 19 160.26 53.91 175.00 161.06 77.10 62.00 245.00 183.00 -0.01
## wt 3 19 3.77 0.78 3.52 3.75 0.45 2.46 5.42 2.96 0.98
## kurtosis se
## mpg -0.80 0.88
## hp -1.21 12.37
## wt 0.14 0.18
## --------------------------------------------------------
## am: 1
## vars n mean sd median trimmed mad min max range skew
## mpg 1 13 24.39 6.17 22.80 24.38 6.67 15.00 33.90 18.90 0.05
## hp 2 13 126.85 84.06 109.00 114.73 63.75 52.00 335.00 283.00 1.36
## wt 3 13 2.41 0.62 2.32 2.39 0.68 1.51 3.57 2.06 0.21
## kurtosis se
## mpg -1.46 1.71
## hp 0.56 23.31
## wt -1.17 0.17
# summary statistics by group via the reshape package
library(reshape)
dstats <- function(x)(c(n=length(x), mean=mean(x), sd=sd(x)))
dfm <- melt(mtcars, measure.vars=c("mpg", "hp", "wt"),
id.vars=c("am", "cyl"))
cast(dfm, am + cyl + variable ~ ., dstats)
## am cyl variable n mean sd
## 1 0 4 mpg 3 22.900000 1.4525839
## 2 0 4 hp 3 84.666667 19.6553640
## 3 0 4 wt 3 2.935000 0.4075230
## 4 0 6 mpg 4 19.125000 1.6317169
## 5 0 6 hp 4 115.250000 9.1787799
## 6 0 6 wt 4 3.388750 0.1162164
## 7 0 8 mpg 12 15.050000 2.7743959
## 8 0 8 hp 12 194.166667 33.3598379
## 9 0 8 wt 12 4.104083 0.7683069
## 10 1 4 mpg 8 28.075000 4.4838599
## 11 1 4 hp 8 81.875000 22.6554156
## 12 1 4 wt 8 2.042250 0.4093485
## 13 1 6 mpg 3 20.566667 0.7505553
## 14 1 6 hp 3 131.666667 37.5277675
## 15 1 6 wt 3 2.755000 0.1281601
## 16 1 8 mpg 2 15.400000 0.5656854
## 17 1 8 hp 2 299.500000 50.2045815
## 18 1 8 wt 2 3.370000 0.2828427
# frequency tables
library(vcd)
## Loading required package: grid
head(Arthritis)
## ID Treatment Sex Age Improved
## 1 57 Treated Male 27 Some
## 2 46 Treated Male 29 None
## 3 77 Treated Male 30 None
## 4 17 Treated Male 32 Marked
## 5 36 Treated Male 46 Marked
## 6 23 Treated Male 58 Marked
# one way table
mytable <- with(Arthritis, table(Improved))
mytable # frequencies
## Improved
## None Some Marked
## 42 14 28
prop.table(mytable) # proportions
## Improved
## None Some Marked
## 0.5000000 0.1666667 0.3333333
prop.table(mytable)*100 # percentages
## Improved
## None Some Marked
## 50.00000 16.66667 33.33333
# two way table
mytable <- xtabs(~ Treatment+Improved, data=Arthritis)
mytable # frequencies
## Improved
## Treatment None Some Marked
## Placebo 29 7 7
## Treated 13 7 21
margin.table(mytable,1) #row sums
## Treatment
## Placebo Treated
## 43 41
margin.table(mytable, 2) # column sums
## Improved
## None Some Marked
## 42 14 28
prop.table(mytable) # cell proportions
## Improved
## Treatment None Some Marked
## Placebo 0.34523810 0.08333333 0.08333333
## Treated 0.15476190 0.08333333 0.25000000
prop.table(mytable, 1) # row proportions
## Improved
## Treatment None Some Marked
## Placebo 0.6744186 0.1627907 0.1627907
## Treated 0.3170732 0.1707317 0.5121951
prop.table(mytable, 2) # column proportions
## Improved
## Treatment None Some Marked
## Placebo 0.6904762 0.5000000 0.2500000
## Treated 0.3095238 0.5000000 0.7500000
addmargins(mytable) # add row and column sums to table
## Improved
## Treatment None Some Marked Sum
## Placebo 29 7 7 43
## Treated 13 7 21 41
## Sum 42 14 28 84
# more complex tables
addmargins(prop.table(mytable))
## Improved
## Treatment None Some Marked Sum
## Placebo 0.34523810 0.08333333 0.08333333 0.51190476
## Treated 0.15476190 0.08333333 0.25000000 0.48809524
## Sum 0.50000000 0.16666667 0.33333333 1.00000000
addmargins(prop.table(mytable, 1), 2)
## Improved
## Treatment None Some Marked Sum
## Placebo 0.6744186 0.1627907 0.1627907 1.0000000
## Treated 0.3170732 0.1707317 0.5121951 1.0000000
addmargins(prop.table(mytable, 2), 1)
## Improved
## Treatment None Some Marked
## Placebo 0.6904762 0.5000000 0.2500000
## Treated 0.3095238 0.5000000 0.7500000
## Sum 1.0000000 1.0000000 1.0000000
# Listing 7.10 - Two way table using CrossTable
library(gmodels)
CrossTable(Arthritis$Treatment, Arthritis$Improved)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 84
##
##
## | Arthritis$Improved
## Arthritis$Treatment | None | Some | Marked | Row Total |
## --------------------|-----------|-----------|-----------|-----------|
## Placebo | 29 | 7 | 7 | 43 |
## | 2.616 | 0.004 | 3.752 | |
## | 0.674 | 0.163 | 0.163 | 0.512 |
## | 0.690 | 0.500 | 0.250 | |
## | 0.345 | 0.083 | 0.083 | |
## --------------------|-----------|-----------|-----------|-----------|
## Treated | 13 | 7 | 21 | 41 |
## | 2.744 | 0.004 | 3.935 | |
## | 0.317 | 0.171 | 0.512 | 0.488 |
## | 0.310 | 0.500 | 0.750 | |
## | 0.155 | 0.083 | 0.250 | |
## --------------------|-----------|-----------|-----------|-----------|
## Column Total | 42 | 14 | 28 | 84 |
## | 0.500 | 0.167 | 0.333 | |
## --------------------|-----------|-----------|-----------|-----------|
##
##
# Listing 7.11 - Three way table
mytable <- xtabs(~ Treatment+Sex+Improved, data=Arthritis)
mytable
## , , Improved = None
##
## Sex
## Treatment Female Male
## Placebo 19 10
## Treated 6 7
##
## , , Improved = Some
##
## Sex
## Treatment Female Male
## Placebo 7 0
## Treated 5 2
##
## , , Improved = Marked
##
## Sex
## Treatment Female Male
## Placebo 6 1
## Treated 16 5
ftable(mytable)
## Improved None Some Marked
## Treatment Sex
## Placebo Female 19 7 6
## Male 10 0 1
## Treated Female 6 5 16
## Male 7 2 5
margin.table(mytable, 1)
## Treatment
## Placebo Treated
## 43 41
margin.table(mytable, 2)
## Sex
## Female Male
## 59 25
margin.table(mytable, 2)
## Sex
## Female Male
## 59 25
margin.table(mytable, c(1,3))
## Improved
## Treatment None Some Marked
## Placebo 29 7 7
## Treated 13 7 21
ftable(prop.table(mytable, c(1,2)))
## Improved None Some Marked
## Treatment Sex
## Placebo Female 0.59375000 0.21875000 0.18750000
## Male 0.90909091 0.00000000 0.09090909
## Treated Female 0.22222222 0.18518519 0.59259259
## Male 0.50000000 0.14285714 0.35714286
ftable(addmargins(prop.table(mytable, c(1, 2)), 3))
## Improved None Some Marked Sum
## Treatment Sex
## Placebo Female 0.59375000 0.21875000 0.18750000 1.00000000
## Male 0.90909091 0.00000000 0.09090909 1.00000000
## Treated Female 0.22222222 0.18518519 0.59259259 1.00000000
## Male 0.50000000 0.14285714 0.35714286 1.00000000
# Listing 7.12 - Chi-square test of independence
library(vcd)
mytable <- xtabs(~Treatment+Improved, data=Arthritis)
chisq.test(mytable)
##
## Pearson's Chi-squared test
##
## data: mytable
## X-squared = 13.055, df = 2, p-value = 0.001463
mytable <- xtabs(~Improved+Sex, data=Arthritis)
chisq.test(mytable)
## Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable
## X-squared = 4.8407, df = 2, p-value = 0.08889
# Fisher's exact test
mytable <- xtabs(~Treatment+Improved, data=Arthritis)
fisher.test(mytable)
##
## Fisher's Exact Test for Count Data
##
## data: mytable
## p-value = 0.001393
## alternative hypothesis: two.sided
# Chochran-Mantel-Haenszel test
mytable <- xtabs(~Treatment+Improved+Sex, data=Arthritis)
mantelhaen.test(mytable)
##
## Cochran-Mantel-Haenszel test
##
## data: mytable
## Cochran-Mantel-Haenszel M^2 = 14.632, df = 2, p-value = 0.0006647
# Listing 7.13 - Measures of association for a two-way table
library(vcd)
mytable <- xtabs(~Treatment+Improved, data=Arthritis)
assocstats(mytable)
## X^2 df P(> X^2)
## Likelihood Ratio 13.530 2 0.0011536
## Pearson 13.055 2 0.0014626
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.367
## Cramer's V : 0.394
# Listing 7.14 Covariances and correlations
states<- state.x77[,1:6]
cov(states)
## Population Income Illiteracy Life Exp Murder
## Population 19931683.7588 571229.7796 292.8679592 -407.8424612 5663.523714
## Income 571229.7796 377573.3061 -163.7020408 280.6631837 -521.894286
## Illiteracy 292.8680 -163.7020 0.3715306 -0.4815122 1.581776
## Life Exp -407.8425 280.6632 -0.4815122 1.8020204 -3.869480
## Murder 5663.5237 -521.8943 1.5817755 -3.8694804 13.627465
## HS Grad -3551.5096 3076.7690 -3.2354694 6.3126849 -14.549616
## HS Grad
## Population -3551.509551
## Income 3076.768980
## Illiteracy -3.235469
## Life Exp 6.312685
## Murder -14.549616
## HS Grad 65.237894
cor(states)
## Population Income Illiteracy Life Exp Murder
## Population 1.00000000 0.2082276 0.1076224 -0.06805195 0.3436428
## Income 0.20822756 1.0000000 -0.4370752 0.34025534 -0.2300776
## Illiteracy 0.10762237 -0.4370752 1.0000000 -0.58847793 0.7029752
## Life Exp -0.06805195 0.3402553 -0.5884779 1.00000000 -0.7808458
## Murder 0.34364275 -0.2300776 0.7029752 -0.78084575 1.0000000
## HS Grad -0.09848975 0.6199323 -0.6571886 0.58221620 -0.4879710
## HS Grad
## Population -0.09848975
## Income 0.61993232
## Illiteracy -0.65718861
## Life Exp 0.58221620
## Murder -0.48797102
## HS Grad 1.00000000
cor(states, method="spearman")
## Population Income Illiteracy Life Exp Murder
## Population 1.0000000 0.1246098 0.3130496 -0.1040171 0.3457401
## Income 0.1246098 1.0000000 -0.3145948 0.3241050 -0.2174623
## Illiteracy 0.3130496 -0.3145948 1.0000000 -0.5553735 0.6723592
## Life Exp -0.1040171 0.3241050 -0.5553735 1.0000000 -0.7802406
## Murder 0.3457401 -0.2174623 0.6723592 -0.7802406 1.0000000
## HS Grad -0.3833649 0.5104809 -0.6545396 0.5239410 -0.4367330
## HS Grad
## Population -0.3833649
## Income 0.5104809
## Illiteracy -0.6545396
## Life Exp 0.5239410
## Murder -0.4367330
## HS Grad 1.0000000
x <- states[,c("Population", "Income", "Illiteracy", "HS Grad")]
y <- states[,c("Life Exp", "Murder")]
cor(x,y)
## Life Exp Murder
## Population -0.06805195 0.3436428
## Income 0.34025534 -0.2300776
## Illiteracy -0.58847793 0.7029752
## HS Grad 0.58221620 -0.4879710
# partial correlations
library(ggm)
## Loading required package: igraph
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
##
## Attaching package: 'ggm'
## The following object is masked from 'package:igraph':
##
## pa
## The following object is masked from 'package:Hmisc':
##
## rcorr
# partial correlation of population and murder rate, controlling
# for income, illiteracy rate, and HS graduation rate
pcor(c(1,5,2,3,6), cov(states))
## [1] 0.3462724
# Listing 7.15 - Testing a correlation coefficient for significance
cor.test(states[,3], states[,5])
##
## Pearson's product-moment correlation
##
## data: states[, 3] and states[, 5]
## t = 6.8479, df = 48, p-value = 1.258e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5279280 0.8207295
## sample estimates:
## cor
## 0.7029752
# Listing 7.16 - Correlation matrix and tests of significance via corr.test
library(psych)
corr.test(states, use="complete")
## Call:corr.test(x = states, use = "complete")
## Correlation matrix
## Population Income Illiteracy Life Exp Murder HS Grad
## Population 1.00 0.21 0.11 -0.07 0.34 -0.10
## Income 0.21 1.00 -0.44 0.34 -0.23 0.62
## Illiteracy 0.11 -0.44 1.00 -0.59 0.70 -0.66
## Life Exp -0.07 0.34 -0.59 1.00 -0.78 0.58
## Murder 0.34 -0.23 0.70 -0.78 1.00 -0.49
## HS Grad -0.10 0.62 -0.66 0.58 -0.49 1.00
## Sample Size
## [1] 50
## Probability values (Entries above the diagonal are adjusted for multiple tests.)
## Population Income Illiteracy Life Exp Murder HS Grad
## Population 0.00 0.59 1.00 1.0 0.10 1
## Income 0.15 0.00 0.01 0.1 0.54 0
## Illiteracy 0.46 0.00 0.00 0.0 0.00 0
## Life Exp 0.64 0.02 0.00 0.0 0.00 0
## Murder 0.01 0.11 0.00 0.0 0.00 0
## HS Grad 0.50 0.00 0.00 0.0 0.00 0
##
## To see confidence intervals of the correlations, print with the short=FALSE option
# t test
library(MASS)
t.test(Prob ~ So, data=UScrime)
##
## Welch Two Sample t-test
##
## data: Prob by So
## t = -3.8954, df = 24.925, p-value = 0.0006506
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03852569 -0.01187439
## sample estimates:
## mean in group 0 mean in group 1
## 0.03851265 0.06371269
# dependent t test
sapply(UScrime[c("U1","U2")], function(x)(c(mean=mean(x),sd=sd(x))))
## U1 U2
## mean 95.46809 33.97872
## sd 18.02878 8.44545
with(UScrime, t.test(U1, U2, paired=TRUE))
##
## Paired t-test
##
## data: U1 and U2
## t = 32.407, df = 46, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 57.67003 65.30870
## sample estimates:
## mean of the differences
## 61.48936
# Wilcoxon two group comparison
with(UScrime, by(Prob, So, median))
## So: 0
## [1] 0.038201
## --------------------------------------------------------
## So: 1
## [1] 0.055552
wilcox.test(Prob ~ So, data=UScrime)
##
## Wilcoxon rank sum test
##
## data: Prob by So
## W = 81, p-value = 8.488e-05
## alternative hypothesis: true location shift is not equal to 0
sapply(UScrime[c("U1", "U2")], median)
## U1 U2
## 92 34
with(UScrime, wilcox.test(U1, U2, paired=TRUE))
## Warning in wilcox.test.default(U1, U2, paired = TRUE): cannot compute exact
## p-value with ties
##
## Wilcoxon signed rank test with continuity correction
##
## data: U1 and U2
## V = 1128, p-value = 2.464e-09
## alternative hypothesis: true location shift is not equal to 0
# Kruskal Wallis test
states <- data.frame(state.region, state.x77)
kruskal.test(Illiteracy ~ state.region, data=states)
##
## Kruskal-Wallis rank sum test
##
## data: Illiteracy by state.region
## Kruskal-Wallis chi-squared = 22.672, df = 3, p-value = 4.726e-05
# Listing 7.17 - Nonparametric multiple comparisons
source("http://www.statmethods.net/RiA/wmc.txt")
states <- data.frame(state.region, state.x77)
wmc(Illiteracy ~ state.region, data=states, method="holm")
## Descriptive Statistics
##
## West North Central Northeast South
## n 13.00000 12.00000 9.00000 16.00000
## median 0.60000 0.70000 1.10000 1.75000
## mad 0.14826 0.14826 0.29652 0.59304
##
## Multiple Comparisons (Wilcoxon Rank Sum Tests)
## Probability Adjustment = holm
##
## Group.1 Group.2 W p
## 1 West North Central 88.0 8.665618e-01
## 2 West Northeast 46.5 8.665618e-01
## 3 West South 39.0 1.788186e-02 *
## 4 North Central Northeast 20.5 5.359707e-02 .
## 5 North Central South 2.0 8.051509e-05 ***
## 6 Northeast South 18.0 1.187644e-02 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1