STATISTIČKI ALATI: FAKTORSKA ANOVA

Hrvatski studiji

dr.sc. Luka Šikić

13 siječanj, 2020

CILJEVI PREDAVANJA

FAKTORSKA ANOVA (bez interakcije)

load(file.path("clinicaltrial.Rdata")) # Učitaj podatke
xtabs( ~ drug + therapy, data = clin.trial ) # Tabuliraj
##           therapy
## drug       no.therapy CBT
##   placebo           3   3
##   anxifree          3   3
##   joyzepam          3   3
aggregate( mood.gain ~ drug + therapy, data = clin.trial, mean ) # Svi faktori
##       drug    therapy mood.gain
## 1  placebo no.therapy  0.300000
## 2 anxifree no.therapy  0.400000
## 3 joyzepam no.therapy  1.466667
## 4  placebo        CBT  0.600000
## 5 anxifree        CBT  1.033333
## 6 joyzepam        CBT  1.500000
aggregate( mood.gain ~ drug, clin.trial, mean ) # Samo droga
##       drug mood.gain
## 1  placebo 0.4500000
## 2 anxifree 0.7166667
## 3 joyzepam 1.4833333
aggregate( mood.gain ~ therapy, clin.trial, mean ) #Samo terapija
##      therapy mood.gain
## 1 no.therapy 0.7222222
## 2        CBT 1.0444444
mean( clin.trial$mood.gain ) # Pogledaj prosjek prediktora
## [1] 0.8833333
no therapy CBT total
NA no therapy CBT total
placebo 0.30 0.60 0.45
anxifree 0.40 1.03 0.72
joyzepam 1.47 1.50 1.48
total 0.72 1.04 0.88
Nulta hipoteza \(H_0\): prosjeci redova su jednaki i.e. \(\mu_{1.} = \mu_{2.} = \mu_{3.}\)
Alternativna hipoteza \(H_1\): barem jedan prosjek je različit.
Nulta hipoteza \(H_0\): prosjeci kolona sujednaki, i.e., \(\mu_{.1} = \mu_{.2}\)
Alternativna hipoteza \(H_1\): prosjeci kolona su različiti, i.e., \(\mu_{.1} \neq \mu_{.2}\)
model.1 <- aov( mood.gain ~ drug, clin.trial )  # Provedi test i spremi rezultate
summary( model.1 )  # Pregledaj rezultate
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## drug         2  3.453  1.7267   18.61 8.65e-05 ***
## Residuals   15  1.392  0.0928                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.2 <- aov( mood.gain ~ drug + therapy, clin.trial )   # Testiraj prošireni model 
summary(model.2)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## drug         2  3.453  1.7267  26.149 1.87e-05 ***
## therapy      1  0.467  0.4672   7.076   0.0187 *  
## Residuals   14  0.924  0.0660                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

\[ \mbox{MS} = \frac{\mbox{SS}}{df} \] \[ F_{A} = \frac{\mbox{MS}_{A}}{\mbox{MS}_{R}} \]

# Provedi F test
pf( q=26.15, df1=2, df2=14, lower.tail=FALSE )
## [1] 1.871981e-05

\[ \mbox{SS}_{A} = (N \times C) \sum_{r=1}^R \left( \bar{Y}_{r.} - \bar{Y}_{..} \right)^2 \]

\[ \mbox{SS}_{B} = (N \times R) \sum_{c=1}^C \left( \bar{Y}_{.c} - \bar{Y}_{..} \right)^2 \]

drug.means <- aggregate( mood.gain ~ drug, clin.trial, mean )[,2] # Prosjek za droge
therapy.means <- aggregate( mood.gain ~ therapy, clin.trial, mean )[,2] # Prosjek za terapiju
grand.mean <- mean( clin.trial$mood.gain ) # Grand prosjek

# Izračunaj kvadrate odstupanja za drogu

SS.drug <- (3*2) * sum( (drug.means - grand.mean)^2 )
SS.drug
## [1] 3.453333
# Izračunaj kavdrate odstupanja za terapiju

SS.therapy <- (3*3) * sum( (therapy.means - grand.mean)^2 )
SS.therapy
## [1] 0.4672222

\[ \mbox{SS}_T = \sum_{r=1}^R \sum_{c=1}^C \sum_{i=1}^N \left( Y_{rci} - \bar{Y}_{..}\right)^2 \]

\[ \mbox{SS}_R = \mbox{SS}_T - (\mbox{SS}_A + \mbox{SS}_B) \]

# Izračunaj ukupnu sumu kvadrata odstupanja
SS.tot <- sum( (clin.trial$mood.gain - grand.mean)^2 )
SS.tot
## [1] 4.845
# Izračunaj ukupna rezidualna odstupanja
SS.res <- SS.tot - (SS.drug + SS.therapy)
SS.res
## [1] 0.9244444

FAKTORSKA ANOVA (sa interakcijama)

library(sciplot)
library(lsr)
 lineplot.CI( x.factor = clin.trial$drug, 
              response = clin.trial$mood.gain,
              group = clin.trial$therapy,
              ci.fun = ciMean,
              xlab = "drug",
              ylab = "mood gain" )

\[ \alpha_r = \mu_{r.} - \mu_{..} \]

\[ \beta_c = \mu_{c.} - \mu_{..} \]

\[ \mu_{rc} = \mu_{..} + \alpha_r + \beta_c \] - Ukoliko ima interakcije

\[ \mu_{rc} \neq \mu_{..} + \alpha_r + \beta_c \]

\[ \hat{\alpha}_r = \bar{Y}_{r.} - \bar{Y}_{..} \]

\[ \hat{\beta}_c = \bar{Y}_{.c} - \bar{Y}_{..} \]

\[\begin{eqnarray*} (\alpha \beta)_{rc} &=& \mu_{rc} - \mu_{..} - \alpha_r - \beta_c \\ &=& \mu_{rc} - \mu_{..} - (\mu_{r.} - \mu_{..}) - (\mu_{.c} - \mu_{..}) \\ &=& \mu_{rc} - \mu_{r.} - \mu_{.c} + \mu_{..} \end{eqnarray*}\]

\[ \hat{(\alpha\beta)}_{rc} = \bar{Y}_{rc} - \bar{Y}_{r.} - \bar{Y}_{.c} + \bar{Y}_{..} \]

\[ \mbox{SS}_{A:B} = N \sum_{r=1}^R \sum_{c=1}^C \left( \bar{Y}_{rc} - \bar{Y}_{r.} - \bar{Y}_{.c} + \bar{Y}_{..} \right)^2 \]

\[ \mbox{SS}_R = \mbox{SS}_T - (\mbox{SS}_A + \mbox{SS}_B + \mbox{SS}_{A:B}) \]

\[\begin{eqnarray*} df_{A:B} &=& (R\times C - 1) - (R - 1) - (C -1 ) \\ &=& RC - R - C + 1 \\ &=& (R-1)(C-1) \end{eqnarray*}\]

# Definiraj model
 model.3 <- aov( mood.gain ~ drug + therapy + drug:therapy, clin.trial )
 model.3 <- aov( mood.gain ~ drug * therapy, clin.trial ) # Altenativni način
 summary( model.3 ) # Pregledaj rezultate modela
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## drug          2  3.453  1.7267  31.714 1.62e-05 ***
## therapy       1  0.467  0.4672   8.582   0.0126 *  
## drug:therapy  2  0.271  0.1356   2.490   0.1246    
## Residuals    12  0.653  0.0544                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

EFEKT VELIČINE

\[ \eta_A^2 = \frac{\mbox{SS}_{A}}{\mbox{SS}_{T}} \] \[ \mbox{partial } \eta^2_A = \frac{\mbox{SS}_{A}}{\mbox{SS}_{A} + \mbox{SS}_{R}} \]

# Izračunaj u R
 etaSquared( model.2 )
##            eta.sq eta.sq.part
## drug    0.7127623   0.7888325
## therapy 0.0964339   0.3357285
# Izračunaj u R za model sa interakcijama
 etaSquared( model.3 )
##                  eta.sq eta.sq.part
## drug         0.71276230   0.8409091
## therapy      0.09643390   0.4169559
## drug:therapy 0.05595689   0.2932692

PROVJERA PRETPOSTAVKI

# Provjeri rezidualnu strukturu
plot(model.2,1)

plot(model.3,1)

library(car) # Učitaj paket
## Loading required package: carData
# Provedi test
 leveneTest( model.3 )
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  5  0.0955 0.9912
##       12
 leveneTest( mood.gain ~ drug * therapy, clin.trial )
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  5  0.0955 0.9912
##       12
 resid <- residuals( model.2 )  # Spremi reziduale u objekt
 hist( resid )                  # Napravi histogram reziduala

 qqnorm( resid )                # QQ plot

 shapiro.test( resid )          # Provedi Shapiro-Wilk test
## 
##  Shapiro-Wilk normality test
## 
## data:  resid
## W = 0.95635, p-value = 0.5329

USPOREDBA MODELA F TESTOM

Hipoteza Ispravni model? R formula za ispravni model
Null \(M0\) Y ~ A + B
Alternative \(M1\) Y ~ A + B + C + D

\[ \mbox{SS}_{T} = \mbox{SS}_{M} + \mbox{SS}_{R} \]

\[ \mbox{SS}_{M} = \mbox{SS}_{T} - \mbox{SS}_{R} \] - Kvadrati odstupanja za prvi model

\[ \mbox{SS}_{M0} = \mbox{SS}_{T} - \mbox{SS}_{R0} \]

Kvadrati odstupanja za drugi model

\[ \mbox{SS}_{M1} = \mbox{SS}_{T} - \mbox{SS}_{R1} \]

\[ \begin{array} \mbox{SS}_{\Delta} &=& \mbox{SS}_{M1} - \mbox{SS}_{M0}\\ &=& (\mbox{SS}_{T} - \mbox{SS}_{R1}) - (\mbox{SS}_{T} - \mbox{SS}_{R0} ) \\ &=& \mbox{SS}_{R0} - \mbox{SS}_{R1} \end{array} \]

\[ \begin{array} \mbox{MS}_{\Delta} &=& \frac{\mbox{SS}_{\Delta} }{ \mbox{df}_{\Delta} } \\ \mbox{MS}_{R1} &=& \frac{ \mbox{SS}_{R1} }{ \mbox{df}_{R1} }\\ \end{array} \]

\[ F = \frac\mbox{MS}_{\Delta}\mbox{MS}_{R1} \]

# Usporedi modele u okiru ANOVA modela
 anova( model.1, model.3 )
## Analysis of Variance Table
## 
## Model 1: mood.gain ~ drug
## Model 2: mood.gain ~ drug * therapy
##   Res.Df     RSS Df Sum of Sq      F  Pr(>F)  
## 1     15 1.39167                              
## 2     12 0.65333  3   0.73833 4.5204 0.02424 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Spremi rezultate u objet
 ss.res.null <- 1.392
 ss.res.full <- 0.653
# Izračunaj razliku kvadrata odstupanja 
 ss.diff <- ss.res.null - ss.res.full 
 ss.diff
## [1] 0.739
# Korigigiraj za stupnjeve slobode
  ms.res <- ss.res.full / 12
  ms.diff <- ss.diff / 3 
# Izračunaj F statistiku
   F.stat <- ms.diff / ms.res 
  F.stat
## [1] 4.526799

POST-HOC TESTOVI

# Provedi test za model 2
 TukeyHSD( model.2 )
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = mood.gain ~ drug + therapy, data = clin.trial)
## 
## $drug
##                        diff        lwr       upr     p adj
## anxifree-placebo  0.2666667 -0.1216321 0.6549655 0.2062942
## joyzepam-placebo  1.0333333  0.6450345 1.4216321 0.0000186
## joyzepam-anxifree 0.7666667  0.3783679 1.1549655 0.0003934
## 
## $therapy
##                     diff       lwr       upr     p adj
## CBT-no.therapy 0.3222222 0.0624132 0.5820312 0.0186602
# Provedi test za model 2
 TukeyHSD(model.3)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = mood.gain ~ drug * therapy, data = clin.trial)
## 
## $drug
##                        diff         lwr       upr     p adj
## anxifree-placebo  0.2666667 -0.09273475 0.6260681 0.1597148
## joyzepam-placebo  1.0333333  0.67393191 1.3927348 0.0000160
## joyzepam-anxifree 0.7666667  0.40726525 1.1260681 0.0002740
## 
## $therapy
##                     diff        lwr       upr    p adj
## CBT-no.therapy 0.3222222 0.08256504 0.5618794 0.012617
## 
## $`drug:therapy`
##                                                diff          lwr
## anxifree:no.therapy-placebo:no.therapy   0.10000000 -0.539927728
## joyzepam:no.therapy-placebo:no.therapy   1.16666667  0.526738939
## placebo:CBT-placebo:no.therapy           0.30000000 -0.339927728
## anxifree:CBT-placebo:no.therapy          0.73333333  0.093405606
## joyzepam:CBT-placebo:no.therapy          1.20000000  0.560072272
## joyzepam:no.therapy-anxifree:no.therapy  1.06666667  0.426738939
## placebo:CBT-anxifree:no.therapy          0.20000000 -0.439927728
## anxifree:CBT-anxifree:no.therapy         0.63333333 -0.006594394
## joyzepam:CBT-anxifree:no.therapy         1.10000000  0.460072272
## placebo:CBT-joyzepam:no.therapy         -0.86666667 -1.506594394
## anxifree:CBT-joyzepam:no.therapy        -0.43333333 -1.073261061
## joyzepam:CBT-joyzepam:no.therapy         0.03333333 -0.606594394
## anxifree:CBT-placebo:CBT                 0.43333333 -0.206594394
## joyzepam:CBT-placebo:CBT                 0.90000000  0.260072272
## joyzepam:CBT-anxifree:CBT                0.46666667 -0.173261061
##                                                upr     p adj
## anxifree:no.therapy-placebo:no.therapy   0.7399277 0.9940083
## joyzepam:no.therapy-placebo:no.therapy   1.8065944 0.0005667
## placebo:CBT-placebo:no.therapy           0.9399277 0.6280049
## anxifree:CBT-placebo:no.therapy          1.3732611 0.0218746
## joyzepam:CBT-placebo:no.therapy          1.8399277 0.0004380
## joyzepam:no.therapy-anxifree:no.therapy  1.7065944 0.0012553
## placebo:CBT-anxifree:no.therapy          0.8399277 0.8917157
## anxifree:CBT-anxifree:no.therapy         1.2732611 0.0529812
## joyzepam:CBT-anxifree:no.therapy         1.7399277 0.0009595
## placebo:CBT-joyzepam:no.therapy         -0.2267389 0.0067639
## anxifree:CBT-joyzepam:no.therapy         0.2065944 0.2750590
## joyzepam:CBT-joyzepam:no.therapy         0.6732611 0.9999703
## anxifree:CBT-placebo:CBT                 1.0732611 0.2750590
## joyzepam:CBT-placebo:CBT                 1.5399277 0.0050693
## joyzepam:CBT-anxifree:CBT                1.1065944 0.2139229
library(multcomp)
summary(glht(model.3, lincft = mcp(drug )))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: aov(formula = mood.gain ~ drug * therapy, data = clin.trial)
## 
## Linear Hypotheses:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) == 0               0.3000     0.1347   2.227    0.157    
## druganxifree == 0              0.1000     0.1905   0.525    0.973    
## drugjoyzepam == 0              1.1667     0.1905   6.124   <0.001 ***
## therapyCBT == 0                0.3000     0.1905   1.575    0.410    
## druganxifree:therapyCBT == 0   0.3333     0.2694   1.237    0.611    
## drugjoyzepam:therapyCBT == 0  -0.2667     0.2694  -0.990    0.769    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

FAKTORSKA ANOVA (nebalansirani dizajn)

library(car)
unbalanced.anova <- aov(mood.gain~drug*therapy, clin.trial)
Anova(unbalanced.anova, type = "III")
## Anova Table (Type III tests)
## 
## Response: mood.gain
##               Sum Sq Df F value    Pr(>F)    
## (Intercept)  0.27000  1  4.9592   0.04586 *  
## drug         2.50889  2 23.0408 7.778e-05 ***
## therapy      0.13500  1  2.4796   0.14131    
## drug:therapy 0.27111  2  2.4898   0.12460    
## Residuals    0.65333 12                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1