dr.sc. Luka Šikić
15 prosinac, 2019
## 'data.frame': 18 obs. of 3 variables:
## $ drug : Factor w/ 3 levels "placebo","anxifree",..: 1 1 1 2 2 2 3 3 3 1 ...
## $ therapy : Factor w/ 2 levels "no.therapy","CBT": 1 1 1 1 1 1 1 1 1 2 ...
## $ mood.gain: num 0.5 0.3 0.1 0.6 0.4 0.2 1.4 1.7 1.3 0.6 ...
## drug therapy mood.gain
## 1 placebo no.therapy 0.5
## 2 placebo no.therapy 0.3
## 3 placebo no.therapy 0.1
## 4 anxifree no.therapy 0.6
## 5 anxifree no.therapy 0.4
## 6 anxifree no.therapy 0.2
## 7 joyzepam no.therapy 1.4
## 8 joyzepam no.therapy 1.7
## 9 joyzepam no.therapy 1.3
## 10 placebo CBT 0.6
## 11 placebo CBT 0.9
## 12 placebo CBT 0.3
## 13 anxifree CBT 1.1
## 14 anxifree CBT 0.8
## 15 anxifree CBT 1.2
## 16 joyzepam CBT 1.8
## 17 joyzepam CBT 1.3
## 18 joyzepam CBT 1.4
## drug
## placebo anxifree joyzepam
## 6 6 6
## drug mood.gain
## 1 placebo 0.4500000
## 2 anxifree 0.7166667
## 3 joyzepam 1.4833333
## drug mood.gain
## 1 placebo 0.2810694
## 2 anxifree 0.3920034
## 3 joyzepam 0.2136976
library(gplots) # Učitaj paket
plotmeans( formula = mood.gain ~ drug, # Porast raspoloženja vs. ljek
data = clin.trial, # Podatci
xlab = "Primjenjeni ljek",
ylab = "Rast raspoloženja",
n.label = FALSE # Ne prikazuj veličinu uzorka
)\[ \begin{array}{rcl} H_0 &:& \mbox{točno je } \mu_P = \mu_A = \mu_J \end{array} \]
\[ \begin{array}{rcl} H_1 &:& \mbox{nije točno } \mu_P = \mu_A = \mu_J \end{array} \]
\[ \mbox{Var}(Y) = \frac{1}{N} \sum_{k=1}^G \sum_{i=1}^{N_k} \left(Y_{ik} - \bar{Y} \right)^2 \]
| ime | osoba (\(p\)) | grupa | broj grupe (\(k\)) | index grupe (\(i\)) | raspolozenje (\(Y_{ik}\) or \(Y_p\)) |
|---|---|---|---|---|---|
| Ann | 1 | cool | 1 | 1 | 20 |
| Ben | 2 | cool | 1 | 2 | 55 |
| Cat | 3 | cool | 1 | 3 | 21 |
| Dan | 4 | uncool | 2 | 1 | 91 |
| Egg | 5 | uncool | 2 | 2 | 22 |
\[ \mbox{Var}(Y) = \frac{1}{N} \sum_{p=1}^N \left(Y_{p} - \bar{Y} \right)^2 \]
\[ \mbox{SS}_{tot} = \sum_{k=1}^G \sum_{i=1}^{N_k} \left(Y_{ik} - \bar{Y} \right)^2 \] - Suma kvadrata odstupanja unutar grupe
\[ \mbox{SS}_w = \sum_{k=1}^G \sum_{i=1}^{N_k} \left( Y_{ik} - \bar{Y}_k \right)^2 \]
\[ \begin{array}{rcl} \mbox{SS}_{b} &=& \sum_{k=1}^G \sum_{i=1}^{N_k} \left( \bar{Y}_k - \bar{Y} \right)^2 \\ &=& \sum_{k=1}^G N_k \left( \bar{Y}_k - \bar{Y} \right)^2 \end{array} \]
\[ \mbox{SS}_w + \mbox{SS}_{b} = \mbox{SS}_{tot} \]
Prikaz varijacije između grupa.
Prikaz varijacije unutar grupa.
\[ \begin{array}{lcl} \mbox{df}_b &=& G-1 \\ \mbox{df}_w &=& N-G \\ \end{array} \]
\[ \begin{array}{lcl} \mbox{MS}_b &=& \displaystyle\frac{\mbox{SS}_b }{ \mbox{df}_b} \\ \mbox{MS}_w &=& \displaystyle\frac{\mbox{SS}_w }{ \mbox{df}_w} \end{array} \]
\[ F = \frac{\mbox{MS}_b }{ \mbox{MS}_w } \]
| df | suma kvadrata | kvadrati odstpanja od prosjeka | \(F\) statistika | \(p\) vrijednost | |
|---|---|---|---|---|---|
| izmedju grupa | \(\mbox{df}_b = G-1\) | SS\(_b = \displaystyle\sum_{k=1}^G N_k (\bar{Y}_k - \bar{Y})^2\) | \(\mbox{MS}_b = \frac{\mbox{SS}_b}{\mbox{df}_b}\) | \(F = \frac{\mbox{MS}_b }{ \mbox{MS}_w }\) | [komplicirano] |
| unutar grupa | \(\mbox{df}_w = N-G\) | SS\(_w = \sum_{k=1}^G \sum_{i = 1}^{N_k} ( {Y}_{ik} - \bar{Y}_k)^2\) | \(\mbox{MS}_w = \frac{\mbox{SS}_w}{\mbox{df}_w}\) | - | - |
| Grupa (\(k\)) | Ishod (\(Y_{ik}\)) |
|---|---|
| placebo | 0.5 |
| placebo | 0.3 |
| placebo | 0.1 |
| anxifree | 0.6 |
| anxifree | 0.4 |
| Grupa (\(k\)) | Ishod (\(Y_{ik}\)) | Grupni prosjek (\(\bar{Y}_k\)) |
|---|---|---|
| placebo | 0.5 | 0.45 |
| placebo | 0.3 | 0.45 |
| placebo | 0.1 | 0.45 |
| anxifree | 0.6 | 0.72 |
| anxifree | 0.4 | 0.72 |
| Grupa (\(k\)) | Ishod (\(Y_{ik}\)) | Grupni prosjek (\(\bar{Y}_k\)) | dodstupanje od gr. prosjeka (\(Y_{ik} - \bar{Y}_{k}\)) | Kvadrat odstupanja (\((Y_{ik} - \bar{Y}_{k})^2\)) |
|---|---|---|---|---|
| placebo | 0.5 | 0.45 | 0.05 | 0.0025 |
| placebo | 0.3 | 0.45 | -0.15 | 0.0225 |
| placebo | 0.1 | 0.45 | -0.35 | 0.1225 |
| anxifree | 0.6 | 0.72 | -0.12 | 0.0136 |
| anxifree | 0.4 | 0.72 | -0.32 | 0.1003 |
\[ \begin{array}{rcl} \mbox{SS}_w &=& 0.0025 + 0.0225 + 0.1225 + 0.0136 + 0.1003 \\ &=& 0.2614 \end{array} \]
# Definiraj varijable
outcome <- clin.trial$mood.gain
group <- clin.trial$drug
gp.means <- tapply(outcome,group,mean)
gp.means <- gp.means[group]
dev.from.gp.means <- outcome - gp.means
squared.devs <- dev.from.gp.means ^2# Poveži u Data Frame
Y <- data.frame( group, outcome, gp.means,
dev.from.gp.means, squared.devs )
print(Y, digits = 2)## group outcome gp.means dev.from.gp.means squared.devs
## 1 placebo 0.5 0.45 0.050 0.0025
## 2 placebo 0.3 0.45 -0.150 0.0225
## 3 placebo 0.1 0.45 -0.350 0.1225
## 4 anxifree 0.6 0.72 -0.117 0.0136
## 5 anxifree 0.4 0.72 -0.317 0.1003
## 6 anxifree 0.2 0.72 -0.517 0.2669
## 7 joyzepam 1.4 1.48 -0.083 0.0069
## 8 joyzepam 1.7 1.48 0.217 0.0469
## 9 joyzepam 1.3 1.48 -0.183 0.0336
## 10 placebo 0.6 0.45 0.150 0.0225
## 11 placebo 0.9 0.45 0.450 0.2025
## 12 placebo 0.3 0.45 -0.150 0.0225
## 13 anxifree 1.1 0.72 0.383 0.1469
## 14 anxifree 0.8 0.72 0.083 0.0069
## 15 anxifree 1.2 0.72 0.483 0.2336
## 16 joyzepam 1.8 1.48 0.317 0.1003
## 17 joyzepam 1.3 1.48 -0.183 0.0336
## 18 joyzepam 1.4 1.48 -0.083 0.0069
## [1] 1.391667
| Grupa (\(k\)) | Group mean (\(\bar{Y}_k\)) | Grand mean (\(\bar{Y}\)) | devijacija (\(\bar{Y}_{k} - \bar{Y}\)) | Kvadrirane devijacije (\((\bar{Y}_{k} - \bar{Y})^2\)) |
|---|---|---|---|---|
| placebo | 0.45 | 0.88 | -0.43 | 0.18 |
| anxifree | 0.72 | 0.88 | -0.16 | 0.03 |
| joyzepam | 1.48 | 0.88 | 0.60 | 0.36 |
7.Pomnoži sa veličinom uzorka
| Grupa (\(k\)) | Kvadrirane devijacije (\((\bar{Y}_{k} - \bar{Y})^2\)) | Veličina uzorka (\(N_k\)) | Ponderirani kv. odst. (\(N_k (\bar{Y}_{k} - \bar{Y})^2\)) |
|---|---|---|---|
| placebo | 0.18 | 6 | 1.11 |
| anxifree | 0.03 | 6 | 0.16 |
| joyzepam | 0.36 | 6 | 2.18 |
\[ \begin{array}{rcl} \mbox{SS}_{b} &=& 1.11 + 0.16 + 2.18 \\ &=& 3.45 \end{array} \]
# Definiraj varijable
gp.means <- tapply(outcome,group,mean)
grand.mean <- mean(outcome)
dev.from.grand.mean <- gp.means - grand.mean
squared.devs <- dev.from.grand.mean ^2
gp.sizes <- tapply(outcome,group,length)
wt.squared.devs <- gp.sizes * squared.devs# Poveži u DF
Y <- data.frame( gp.means, grand.mean, dev.from.grand.mean,
squared.devs, gp.sizes, wt.squared.devs )
print(Y, digits = 2)## gp.means grand.mean dev.from.grand.mean squared.devs gp.sizes
## placebo 0.45 0.88 -0.43 0.188 6
## anxifree 0.72 0.88 -0.17 0.028 6
## joyzepam 1.48 0.88 0.60 0.360 6
## wt.squared.devs
## placebo 1.13
## anxifree 0.17
## joyzepam 2.16
## [1] 3.453333
\[ \begin{array}{lclcl} \mbox{df}_b &=& G - 1 &=& 2 \\ \mbox{df}_w &=& N - G &=& 15 \end{array} \]
\[ \begin{array}{lclclcl} \mbox{MS}_b &=& \displaystyle\frac{\mbox{SS}_b }{ \mbox{df}_b } &=& \displaystyle\frac{3.45}{ 2} &=& 1.73 \end{array} \] \[ \begin{array}{lclclcl} \mbox{MS}_w &=& \displaystyle\frac{\mbox{SS}_w }{ \mbox{df}_w } &=& \displaystyle\frac{1.39}{15} &=& 0.09 \end{array} \]
\[ F \ = \ \frac{\mbox{MS}_b }{ \mbox{MS}_w } \ = \ \frac{1.73}{0.09} \ = \ 18.6 \]
## [1] 8.672727e-05
Prikaži rezultate u tablici
| df | suma | kvadrata sred | nji kvadrati \(F\)- | statistika \(p\)- | vrijednost |
|---|---|---|---|---|---|
| between groups | 2 | 3.45 | 1.73 | 18.6 | \(8.67 \times 10^{-5}\) |
| within groups | 15 | 1.39 | 0.09 | - | - |
# Različiti načini za provođenje ANOVA postupka
aov( formula = mood.gain ~ drug, data = clin.trial ) # Puna specifikacija
aov( clin.trial$mood.gain ~ clin.trial$drug ) # Definiranje varijabli
aov( mood.gain ~ drug, clin.trial ) # Skarćeno;definirani df## [1] "aov" "lm"
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "contrasts" "xlevels" "call" "terms"
## [13] "model"
## Call:
## aov(formula = mood.gain ~ drug, data = clin.trial)
##
## Terms:
## drug Residuals
## Sum of Squares 3.453333 1.391667
## Deg. of Freedom 2 15
##
## Residual standard error: 0.3045944
## Estimated effects may be unbalanced
## Df Sum Sq Mean Sq F value Pr(>F)
## drug 2 3.453 1.7267 18.61 8.65e-05 ***
## Residuals 15 1.392 0.0928
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\[ \eta^2 = \frac{\mbox{SS}_b}{\mbox{SS}_{tot}} \]
\[ \eta^2 = \frac{3.45}{4.84} = 0.71 \]
\[ \eta= \sqrt{\frac{\mbox{SS}_b}{\mbox{SS}_{tot}}} \]
SStot <- SSb + SSw # Ukupna suma kvadrata
eta.squared <- SSb / SStot # Eta kvadrat
print( eta.squared ) # Prikaži rezultate## [1] 0.7127623
## eta.sq eta.sq.part
## drug 0.7127623 0.7127623
\[ \begin{array}{lrcl} H_0: & Y_{ik} &=& \mu + \epsilon_{ik} \\ H_1: & Y_{ik} &=& \mu_k + \epsilon_{ik} \end{array} \]
\[ \epsilon_{ik} \sim \mbox{Normal}(0, \sigma^2) \]
\[ Z_{ik} = \left| Y_{ik} - \bar{Y}_k \right| \]
\[ Z_{ik} = \left| Y_{ik} - \mbox{median}_k(Y) \right| \]
## Loading required package: carData
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 1.4672 0.2618
## 15
## Levene's Test for Homogeneity of Variance (center = mean)
## Df F value Pr(>F)
## group 2 1.4497 0.2657
## 15
# Provedi test korak po korak
Y <- clin.trial $ mood.gain # Ishod
G <- clin.trial $ drug # Grupa
gp.mean <- tapply(Y, G, mean) # Izračunaj grupne prosjeke
Ybar <- gp.mean[G] # Izvadi grupne prosjeke vezane uz svaku ops
Z <- abs(Y - Ybar) # Transformirana varijabla
summary( aov(Z ~ G) ) # Izvrši ANOVA-u ## Df Sum Sq Mean Sq F value Pr(>F)
## G 2 0.0616 0.03080 1.45 0.266
## Residuals 15 0.3187 0.02125
##
## One-way analysis of means (not assuming equal variances)
##
## data: mood.gain and drug
## F = 26.322, num df = 2.0000, denom df = 9.4932, p-value = 0.000134
# Usporedi sa rezultatima testa za jednake varijance
oneway.test(mood.gain ~ drug, data = clin.trial, var.equal = TRUE)##
## One-way analysis of means
##
## data: mood.gain and drug
## F = 18.611, num df = 2, denom df = 15, p-value = 8.646e-05
##
## Shapiro-Wilk normality test
##
## data: my.anova.residuals
## W = 0.96019, p-value = 0.6053
\[ \bar{R}_k = \frac{1}{N_K} \sum_{i} R_{ik} \]
\[ \bar{R} = \frac{1}{N} \sum_{i} \sum_{k} R_{ik} \]
\[ \mbox{RSS}_{tot} = \sum_k \sum_i ( R_{ik} - \bar{R} )^2 \]
\[ \begin{array}{rcl} \mbox{RSS}_{b} &=& \sum_k \sum_i ( \bar{R}_k - \bar{R} )^2 \\ &=& \sum_k N_k ( \bar{R}_k - \bar{R} )^2 \end{array} \]
\[ K = (N - 1) \times \frac{\mbox{RSS}_b}{\mbox{RSS}_{tot}} \]
\[ K = \frac{12}{N(N-1)} \sum_k N_k {\bar{R}_k}^2 - 3(N+1) \]
##
## 0.1 0.2 0.3 0.4 0.5 0.6 0.8 0.9 1.1 1.2 1.3 1.4 1.7 1.8
## 1 1 2 1 1 2 1 1 1 1 2 2 1 1
##
## Kruskal-Wallis rank sum test
##
## data: mood.gain by drug
## Kruskal-Wallis chi-squared = 12.076, df = 2, p-value = 0.002386
##
## Kruskal-Wallis rank sum test
##
## data: clin.trial$mood.gain and clin.trial$drug
## Kruskal-Wallis chi-squared = 12.076, df = 2, p-value = 0.002386
## Df Sum Sq Mean Sq F value Pr(>F)
## therapy 1 0.467 0.4672 1.708 0.21
## Residuals 16 4.378 0.2736
##
## Two Sample t-test
##
## data: mood.gain by therapy
## t = -1.3068, df = 16, p-value = 0.2098
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.8449518 0.2005073
## sample estimates:
## mean in group no.therapy mean in group CBT
## 0.7222222 1.0444444
## [1] 1.707726