kabacoff

# Analisis of Variance Robert Kabacoff R-in-Action chapter 9
library(rrcov);library(HH);library(multcomp);library(MASS);library(mvoutlier)

## Loading required package: robustbase

## Scalable Robust Estimators with High Breakdown Point (version 1.3-8)

## Loading required package: lattice

## Loading required package: grid

## Loading required package: latticeExtra

## Loading required package: RColorBrewer

## Loading required package: multcomp

## Loading required package: mvtnorm

## Loading required package: survival

## 
## Attaching package: 'survival'

## The following object is masked from 'package:robustbase':
## 
##     heart

## Loading required package: TH.data

## Loading required package: gridExtra

## Loading required package: sgeostat

## sROC 0.1-2 loaded

# library(car)
# objects(grep("car",search()))
# ?Anova
# One-Way Anova
# In a one-way ANOVA, you're interested in comparing the dependent variable means of
# two or more groups defined by a categorical grouping factor. This example comes
# from the cholesterol dataset in the multcomp package, taken from Westfall, Tobia,
# Rom, & Hochberg (1999). Fifty patients received one of five cholesterol-reducing
# drug regimens (trt). Three of the treatment conditions involved the same drug
# administered as 20 mg once per day (1time), 10mg twice per day (2times), or 5 mg
# four times per day (4times). The two remaining conditions (drugD and drugE) represented
# competing drugs. Which drug regimen produced the greatest cholesterol
# reduction (response)? The analysis is provided in the following listing.
library(multcomp)
attach(cholesterol)
names(cholesterol)

## [1] "trt"      "response"

head(cholesterol)

##     trt response
## 1 1time   3.8612
## 2 1time  10.3868
## 3 1time   5.9059
## 4 1time   3.0609
## 5 1time   7.7204
## 6 1time   2.7139

table(trt)

## trt
##  1time 2times 4times  drugD  drugE 
##     10     10     10     10     10

aggregate(response,by=list(trt),FUN=mean)# group means

##   Group.1        x
## 1   1time  5.78197
## 2  2times  9.22497
## 3  4times 12.37478
## 4   drugD 15.36117
## 5   drugE 20.94752

aggregate(response,by=list(trt),FUN=sd)# group standard devediation

##   Group.1        x
## 1   1time 2.878113
## 2  2times 3.483054
## 3  4times 2.923119
## 4   drugD 3.454636
## 5   drugE 3.345003

fit <- aov(response~trt)
summary(fit)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## trt          4 1351.4   337.8   32.43 9.82e-13 ***
## Residuals   45  468.8    10.4                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

library(gplots)

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:HH':
## 
##     residplot

## The following object is masked from 'package:stats':
## 
##     lowess

plotmeans(response~trt,xlab = "Treatment",ylab = "Response",
          main="Mean Plot \nwith 95% CI")

detach(cholesterol)
# Looking at the output, you can
# see that 10 patients received each
# of the drug regimens 1. From
# the means, it appears that drugE
# produced the greatest cholesterol
# reduction, whereas 1time produced
# the least 2. Standard deviations
# were relatively constant
# across the five groups, ranging
# from 2.88 to 3.48 3. The ANOVA
# F test for treatment (trt) is significant
# (p < .0001), providing evidence
# that the five treatments
# aren't all equally effective 4.
# The plotmeans() function in
# the gplots package can be used
# to produce a graph of group
# means and their confidence
# intervals 5. A plot of the treatment
# means, with 95% confidence limits, is provided in the figure and allows you to
# clearly see these treatment differences.

# 9.3.1 Multiple comparisons
# Listing 9.2 Tukey HSD pairwise group comparisons
TukeyHSD(fit)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = response ~ trt)
## 
## $trt
##                   diff        lwr       upr     p adj
## 2times-1time   3.44300 -0.6582817  7.544282 0.1380949
## 4times-1time   6.59281  2.4915283 10.694092 0.0003542
## drugD-1time    9.57920  5.4779183 13.680482 0.0000003
## drugE-1time   15.16555 11.0642683 19.266832 0.0000000
## 4times-2times  3.14981 -0.9514717  7.251092 0.2050382
## drugD-2times   6.13620  2.0349183 10.237482 0.0009611
## drugE-2times  11.72255  7.6212683 15.823832 0.0000000
## drugD-4times   2.98639 -1.1148917  7.087672 0.2512446
## drugE-4times   8.57274  4.4714583 12.674022 0.0000037
## drugE-drugD    5.58635  1.4850683  9.687632 0.0030633

par(las=2)
par(mar=c(5,8,4,2))
plot(TukeyHSD(fit))

# In this graph, confidence intervals that
# include 0 indicate treatments that aren't significantly different (p > 0.5).
library(multcomp)
par(mar=c(5,4,6,2))
tuk <- glht(fit,linfct=mcp(trt="Tukey"))
plot(cld(tuk,level = 0.05),col="lightgrey")

# In this code, the par statement increases the top margin to fit the letter array. The
# level option in the cld() function provides the significance level to use (0.05, or
# 95% confidence in this case).
# Groups (represented by box plots) that have the same letter don't have significantly
# different means. You can see that 1time and 2times aren't significantly different (they
# both have the letter a) and that 2times and 4times aren't significantly different (they both have the letter
# b);but that 1time and 4times are different (they don't share a letter).
# Personally, I find figure 9.3 easier to read than figure 9.2. It also has the advantage
# of providing information on the distribution of scores within each group.
# From these results, you can see that taking the cholesterol-lowering drug in 5 mg
# doses four times a day was better than taking a 20 mg dose once per day. The competitor
# drugD wasn't superior to this four-times-per-day regimen. But competitor drugE
# was superior to both drugD and all three dosage strategies for the focus drug.

# 9.3.2 Assessing test assumptions
library(car)

## 
## Attaching package: 'car'

## The following objects are masked from 'package:HH':
## 
##     logit, vif

# Note the qqPlot() requires an lm() fit. The graph is provided in figure 9.4. The data
# falls within the 95% confidence envelope, suggesting that the normality assumption
# has been met fairly well
qqPlot(lm(response~trt,data = cholesterol),
       simulate=TRUE,main="QQplot",labels=FALSE)

bartlett.test(response~trt,data = cholesterol)#Bartlett's test indicates that the variances

## 
##  Bartlett test of homogeneity of variances
## 
## data:  response by trt
## Bartlett's K-squared = 0.57975, df = 4, p-value = 0.9653

# in the five groups don't differ significantly (p = 0.97).
library(car)
outlierTest(fit)

## 
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
##    rstudent unadjusted p-value Bonferonni p
## 19 2.251149           0.029422           NA

# From the output, you can see that there's no indication of outliers in the cholesterol
# data (NA occurs when p > 1). Taking the Q-Q plot, Bartlett's test, and outlier test
# together, the data appear to fit the ANOVA model quite well. This, in turn, adds to
# your confidence in the results.

kabacoff_9.R

aruac

Fri May 20 15:52:10 2016