Chapter8 方差分析

0.1 单因子方差分析
0.2 方差分析假定条件检验
- 0.2.1 正态性检验：Q-Q图、Shapiro-Wilk检验、K-S检验
- 0.2.2 方差齐性检验
0.3 单因子方差分析的非参数方法
- 0.3.1 Kruskal-Wallis检验；检验多个总体是否相同

0.1 单因子方差分析

load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch8\\example8_1.RData")
example8_1=cbind(example8_1,id=factor(1:10))#转为长格式数据
library(reshape)#reshape2包里似乎没有rename这一函数
                #下面的example8_2_1与提供的example8_2应该相同
example8_2_1=melt(example8_1,id.vars=c('id'),variable_name='品种')
example8_2_1=rename(example8_2_1,c(id='地块',value='产量'))
save(example8_2_1,file='D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch8\\example8_2_1.RData')
head(example8_2_1)

##   地块  品种 产量
## 1    1 品种1   81
## 2    2 品种1   82
## 3    3 品种1   79
## 4    4 品种1   81
## 5    5 品种1   78
## 6    6 品种1   89

0.1.1 样本数据的描述性统计:箱线图、描述统计量

load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch8\\example8_2.RData")
boxplot(example8_2$产量~example8_2$品种,col='gold',main='',ylab='产量',xlab='品种')

0.1.2 单因子方差分析

\[H_0:\alpha_1=\alpha_2=\alpha_3=0（品种对产量的影响不显著） \quad v.s. \quad H_1:\alpha_1、\alpha_2、\alpha_3至少有一个不等于0（品种对产量的影响显著）\]

attach(example8_2)
my_summary=function(x){
  with(x,data.frame('均值'=mean(产量),'标准差'=sd(产量),n=length(产量)))
  #写函数的时候不要加 example8_2$ ，否则结果是错的,故需要attach声明变量。
}
library(plyr)

## 
## Attaching package: 'plyr'

## The following objects are masked from 'package:reshape':
## 
##     rename, round_any

ddply(example8_2,.(品种),my_summary)

##    品种 均值   标准差  n
## 1 品种1   84 4.546061 10
## 2 品种2   74 4.447221 10
## 3 品种3   82 5.270463 10

detach(example8_2)

0.1.3 单因子方差分析:方差分析表、方差分析模型参数估计

model_1w=aov(example8_2$产量~example8_2$品种)
summary(model_1w)

##                 Df Sum Sq Mean Sq F value   Pr(>F)    
## example8_2$品种  2    560  280.00   12.31 0.000158 ***
## Residuals       27    614   22.74                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model_1w$coefficients#方差分析的参数估计

##          (Intercept) example8_2$品种品种2 example8_2$品种品种3 
##                   84                  -10                   -2

  #绘制均值图观察各样本之间的差异
library(gplots)

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:stats':
## 
##     lowess

plotmeans(example8_2$产量~example8_2$品种)

0.1.4 效应量分析

\[\eta^2 =\frac{SSA}{SST}\]

library(DescTools)

## Registered S3 method overwritten by 'DescTools':
##   method         from  
##   reorder.factor gplots

## 
## Attaching package: 'DescTools'

## The following object is masked from 'package:gplots':
## 
##     reorder.factor

EtaSq(model_1w,anova = T)

##                    eta.sq eta.sq.part  SS df        MS       F            p
## example8_2$品种 0.4770017   0.4770017 560  2 280.00000 12.3127 0.0001583995
## Residuals       0.5229983          NA 614 27  22.74074      NA           NA

0.1.5 方差分析的多重比较

0.1.5.1 Fisher的LSD方法：最小显著差异

\[H_0:\mu_i=\mu_j \quad v.s. \quad H_1:\mu_i \neq \mu_j,\quad i,j=1,2,3\ \& \ i \neq j\]

library(agricolae)
LSD=LSD.test(model_1w,'example8_2$品种')
LSD

## $statistics
##    MSerror Df Mean       CV  t.value      LSD
##   22.74074 27   80 5.960907 2.051831 4.375813
## 
## $parameters
##         test p.ajusted          name.t ntr alpha
##   Fisher-LSD      none example8_2$品种   3  0.05
## 
## $means
##       example8_2$产量      std  r      LCL      UCL Min Max   Q25  Q50   Q75
## 品种1              84 4.546061 10 80.90583 87.09417  78  92 81.00 83.5 86.75
## 品种2              74 4.447221 10 70.90583 77.09417  66  81 72.00 72.5 77.00
## 品种3              82 5.270463 10 78.90583 85.09417  76  89 77.25 81.5 87.00
## 
## $comparison
## NULL
## 
## $groups
##       example8_2$产量 groups
## 品种1              84      a
## 品种3              82      a
## 品种2              74      b
## 
## attr(,"class")
## [1] "group"

      #输出更多信息
library(DescTools)
PostHocTest(model_1w,method = 'lsd')

## 
##   Posthoc multiple comparisons of means : Fisher LSD 
##     95% family-wise confidence level
## 
## $`example8_2$品种`
##             diff     lwr.ci    upr.ci    pval    
## 品种2-品种1  -10 -14.375813 -5.624187   7e-05 ***
## 品种3-品种1   -2  -6.375813  2.375813 0.35666    
## 品种3-品种2    8   3.624187 12.375813 0.00085 ***
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

0.1.5.2 Tukey-Kramer的HSD方法：真实显著差异

\[H_0:\mu_i=\mu_j \quad v.s. \quad H_1:\mu_i \neq \mu_j,\quad i,j=1,2,3\ \& \ i \neq j\]

TukeyHSD(model_1w)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = example8_2$产量 ~ example8_2$品种)
## 
## $`example8_2$品种`
##             diff        lwr       upr     p adj
## 品种2-品种1  -10 -15.287702 -4.712298 0.0002017
## 品种3-品种1   -2  -7.287702  3.287702 0.6215828
## 品种3-品种2    8   2.712298 13.287702 0.0023770

plot(TukeyHSD(model_1w))#绘制配对差值置信区间的比较图

    #输出其他信息
library(agricolae)
HSD=HSD.test(model_1w,'example8_2$品种')
HSD

## $statistics
##    MSerror Df Mean       CV      MSD
##   22.74074 27   80 5.960907 5.287702
## 
## $parameters
##    test          name.t ntr StudentizedRange alpha
##   Tukey example8_2$品种   3         3.506426  0.05
## 
## $means
##       example8_2$产量      std  r Min Max   Q25  Q50   Q75
## 品种1              84 4.546061 10  78  92 81.00 83.5 86.75
## 品种2              74 4.447221 10  66  81 72.00 72.5 77.00
## 品种3              82 5.270463 10  76  89 77.25 81.5 87.00
## 
## $comparison
## NULL
## 
## $groups
##       example8_2$产量 groups
## 品种1              84      a
## 品种3              82      a
## 品种2              74      b
## 
## attr(,"class")
## [1] "group"

0.2 方差分析假定条件检验

0.2.1 正态性检验：Q-Q图、Shapiro-Wilk检验、K-S检验

0.2.1.1 Q-Q图

load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch8\\example8_1.RData")
par(mfrow=c(1,3),cex=0.6,mai=c(0.5,0.5,0.2,0.1))
qqnorm(example8_1$品种1,xlab='期望正态值',ylab='观察值',datax=TRUE,main='品种1的Q-Q图')
qqline(example8_1$品种1,datax=TRUE)
qqnorm(example8_1$品种2,xlab='期望正态值',ylab='观察值',datax=TRUE,main='品种2的Q-Q图')
qqline(example8_1$品种2,datax=TRUE)
qqnorm(example8_1$品种3,xlab='期望正态值',ylab='观察值',datax=TRUE,main='品种3的Q-Q图')
qqline(example8_1$品种3,datax=TRUE)

     #绘制3个品种产量数据合并后的正态Q-Q图
load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch8\\example8_2.RData")
par(cex=0.8,mai=c(0.7,0.7,0.1,0.1))
qqnorm(example8_2$产量,xlab='期望正态值',ylab='观察值',datax=TRUE,main='')
qqline(example8_2$产量,datax=TRUE,col='red',lwd=2)
par(fig=c(0.08,0.5,0.5,0.98),new=TRUE)
hist(example8_2$产量,xlab='产量',ylab='',freq = FALSE,col='lightblue',cex.axis=0.7,cex.lab=0.7,main='')
lines(density(example8_2$产量),col='red',lwd=2)
box()

0.2.2 方差齐性检验

0.2.2.1 图示法：残差图、标准化残差的Q-Q图

load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch8\\example8_2.RData")
model_1w=aov(example8_2$产量~example8_2$品种)
par(mfrow=c(1,2),mai=c(0.5,0.5,0.2,0.1),cex=0.6,cex.main=0.7)
plot(model_1w,which=c(1,2))

load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch8\\example8_5.RData")
model_2wi=aov(example8_5$产量~example8_5$品种+example8_5$施肥方式+example8_5$品种:example8_5$施肥方式)
par(mfrow=c(1,2),mai=c(0.5,0.5,0.2,0.1),cex=0.6,cex.main=0.7)
plot(model_2wi,which=1:2)

0.2.2.2 Bartlett方差齐性检验：所有总体近似服从正态分布

\[H_0:\sigma_1^2=\sigma_2^2=...=\sigma_I^2 \quad v.s. \quad H_1:\sigma_1^2、\sigma_2^2、...、\sigma_I^2至少两个不同\]

load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch8\\example8_5.RData")
bartlett.test(example8_5$产量~example8_5$品种)#不同品种产量的Bartlett检验

## 
##  Bartlett test of homogeneity of variances
## 
## data:  example8_5$产量 by example8_5$品种
## Bartlett's K-squared = 0.30152, df = 2, p-value = 0.8601

bartlett.test(example8_5$产量~example8_5$施肥方式)#不同施肥方式产量的Bartlett检验

## 
##  Bartlett test of homogeneity of variances
## 
## data:  example8_5$产量 by example8_5$施肥方式
## Bartlett's K-squared = 0.42431, df = 1, p-value = 0.5148

0.2.2.3 Levene方差齐性检验：所有总体均不服从正态分布

load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch8\\example8_5.RData")
library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:DescTools':
## 
##     Recode

leveneTest(example8_5$产量~example8_5$品种)#不同品种产量的Levene检验

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  0.9292 0.4071
##       27

leveneTest(example8_5$产量~example8_5$施肥方式)#不同施肥方式产量的Levene检验

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  0.2323 0.6336
##       28

0.3 单因子方差分析的非参数方法

0.3.1 Kruskal-Wallis检验；检验多个总体是否相同

\[H_0:所有总体都相同 \quad v.s. \quad H_1:并非所有总体都相同\]

load("D:\\New_Folder\\Study_Programming\\R_Programme\\Applied Statistics\\datas - Copy\\example\\ch8\\example8_2.RData")
kruskal.test(example8_2$产量~example8_2$品种)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  example8_2$产量 by example8_2$品种
## Kruskal-Wallis chi-squared = 13.532, df = 2, p-value = 0.001152