Anova 3-czynnikowa

Wprowadzenie

Trójczynnikowa ANOVA jest rozszerzeniem dwukierunkowej ANOVA do oceny, czy istnieje efekt interakcji pomiędzy trzema niezależnymi zmiennymi kategorycznymi (jakościowymi) na ciągłą zmienną wynikową (ilościową).

Wykorzystamy zbiór danych headache [z pakietu datarium], który zawiera ankietę dot. miary bólu w epizodzie migrenowego bólu głowy u 72 uczestników leczonych trzema różnymi metodami. Wśród uczestników jest 36 mężczyzn i 36 kobiet. Mężczyźni i kobiety zostali dalej podzieleni na grupy o niskim lub wysokim ryzyku migreny.

Chcemy zrozumieć, jak każda niezależna zmienna (rodzaj leczenia, ryzyko migreny i płeć) współdziałają w przewidywaniu wyniku bólu.

Statystyki opisowe

raport <- list(
  "Ból w epizodzie migrenowego bólu głowy" =
    list(
      "Min" = ~ min(pain_score),
      "Max" = ~ max(pain_score),
      "Q1"  = ~ quantile(pain_score, 0.25),
      "Mediana" = ~ round(median(pain_score,0.5),2),
      "Q3"  = ~ quantile(pain_score, 0.75),
      "Średnia" = ~ round(mean(pain_score),2),
      "Odch. std." = ~ round(sd(pain_score),2)
    )  
)
tabela<-summary_table(headache, summaries=raport, by=c("gender", "risk", "treatment"))
kbl(tabela, digits=2, caption="Statystyki opisowe") %>%
  kable_classic(full_width=F) %>%
  kable_styling(bootstrap_options=c("striped","hover"))

Statystyki opisowe
	male.high.X (N = 6)	female.high.X (N = 6)	male.low.X (N = 6)	female.low.X (N = 6)	male.high.Y (N = 6)	female.high.Y (N = 6)	male.low.Y (N = 6)	female.low.Y (N = 6)	male.high.Z (N = 6)	female.high.Z (N = 6)	male.low.Z (N = 6)	female.low.Z (N = 6)
Min	86.29	68.36	70.83	68.61	77.5	73.14	67.92	63.73	74.42	74.99	68.30	65.45
Max	100.00	82.66	81.16	78.07	91.2	86.59	80.68	74.75	85.06	87.14	80.43	73.10
Q1	88.91	79.14	73.59	72.01	79.0	80.01	69.46	65.24	76.60	79.81	70.71	68.87
Mediana	93.39	81.06	75.95	74.62	81.2	81.81	73.36	68.73	80.37	80.84	74.85	69.59
Q3	95.30	81.43	78.69	77.06	83.9	83.67	74.86	69.80	81.98	82.41	77.95	71.65
Średnia	92.74	78.87	76.05	74.16	82.3	81.18	73.14	68.36	79.68	81.04	74.46	69.78
Odch. std.	5.12	5.32	3.85	3.69	5.0	4.62	4.77	4.08	4.05	3.98	4.89	2.72

ggplot(headache, aes(x=pain_score)) +
  geom_histogram(bins=10) +
  facet_grid(gender~treatment~risk)

ggplot(headache, aes(x=pain_score)) +
  geom_histogram(bins=10) +
  facet_grid(gender~treatment~risk)

pudelkowy<- ggboxplot(headache, x="gender",
          y="pain_score", color="risk")
pudelkowy

Założenia

Obserwacje odstające

headache %>% 
  group_by(treatment, risk, gender) %>%
  identify_outliers(pain_score)

## # A tibble: 4 x 7
##   gender risk  treatment    id pain_score is.outlier is.extreme
##   <fct>  <fct> <fct>     <int>      <dbl> <lgl>      <lgl>     
## 1 female high  X            57       68.4 TRUE       TRUE      
## 2 female high  Y            62       73.1 TRUE       FALSE     
## 3 female high  Z            67       75.0 TRUE       FALSE     
## 4 female high  Z            71       87.1 TRUE       FALSE

Normalność

headache %>% 
  group_by(treatment, risk, gender) %>%
  shapiro_test(pain_score)

## # A tibble: 12 x 6
##    gender risk  treatment variable   statistic       p
##    <fct>  <fct> <fct>     <chr>          <dbl>   <dbl>
##  1 male   high  X         pain_score     0.958 0.808  
##  2 female high  X         pain_score     0.714 0.00869
##  3 male   low   X         pain_score     0.982 0.962  
##  4 female low   X         pain_score     0.933 0.600  
##  5 male   high  Y         pain_score     0.902 0.384  
##  6 female high  Y         pain_score     0.939 0.654  
##  7 male   low   Y         pain_score     0.920 0.507  
##  8 female low   Y         pain_score     0.927 0.555  
##  9 male   high  Z         pain_score     0.955 0.784  
## 10 female high  Z         pain_score     0.971 0.901  
## 11 male   low   Z         pain_score     0.924 0.535  
## 12 female low   Z         pain_score     0.958 0.801

ggqqplot(headache, "pain_score") +
  facet_grid(treatment~risk~gender)

Jednorodność wariancji

wyniki<-headache %>%
  levene_test(pain_score~treatment*risk*gender)
wyniki

## # A tibble: 1 x 4
##     df1   df2 statistic     p
##   <int> <int>     <dbl> <dbl>
## 1    11    60     0.179 0.998

###są jednorodne

Anova

wynikiAN<-headache %>%
 anova_test(pain_score~gender*risk*treatment)

## Coefficient covariances computed by hccm()

wynikiAN

## ANOVA Table (type II tests)
## 
##                  Effect DFn DFd      F                 p p<.05   ges
## 1                gender   1  60 16.196 0.000163000000000     * 0.213
## 2                  risk   1  60 92.699 0.000000000000088     * 0.607
## 3             treatment   2  60  7.318 0.001000000000000     * 0.196
## 4           gender:risk   1  60  0.141 0.708000000000000       0.002
## 5      gender:treatment   2  60  3.338 0.042000000000000     * 0.100
## 6        risk:treatment   2  60  0.713 0.494000000000000       0.023
## 7 gender:risk:treatment   2  60  7.406 0.001000000000000     * 0.198

istnieje interakcja między płcią, lekiem i ryzykiem(naraz),

Testy post-hoc

wynik2<- headache %>%
  group_by(gender, risk) %>%
  tukey_hsd(pain_score ~ treatment) %>%
select(-null.value, - conf.low, - conf.high)
wynik2

## # A tibble: 12 x 8
##    gender risk  term      group1 group2 estimate    p.adj p.adj.signif
##    <fct>  <fct> <chr>     <chr>  <chr>     <dbl>    <dbl> <chr>       
##  1 male   high  treatment X      Y       -10.4   0.00471  **          
##  2 male   high  treatment X      Z       -13.1   0.000688 ***         
##  3 male   high  treatment Y      Z        -2.66  0.605    ns          
##  4 male   low   treatment X      Y        -2.91  0.52     ns          
##  5 male   low   treatment X      Z        -1.60  0.817    ns          
##  6 male   low   treatment Y      Z         1.32  0.871    ns          
##  7 female high  treatment X      Y         2.31  0.675    ns          
##  8 female high  treatment X      Z         2.17  0.706    ns          
##  9 female high  treatment Y      Z        -0.140 0.999    ns          
## 10 female low   treatment X      Y        -5.79  0.0319   *           
## 11 female low   treatment X      Z        -4.38  0.115    ns          
## 12 female low   treatment Y      Z         1.42  0.771    ns

###Różnice pomiędzy parami średnich w osiąganym wyniku pain_score okazały się istotne statystycznie jedynie w przypadku 3 par, dla mężczyzn o wysokim ryzyku i leku X a leku Y oraz leku X a leku Z oraz dla kobiet o niskim zagrożeniu pomiędzy zastosowaniem leku X a leku Y

###Wyniki te widać na poniższym wykresie, interakcje między czynnikami są istotne, F=7.41 , p=0,001

wynik2 <- wynik2 %>% add_xy_position(x="gender")
pudelkowy +
  stat_pvalue_manual(wynik2) +
  labs(
    subtitle=get_test_label(wynikiAN,detailed=TRUE),
    caption=get_pwc_label(wynik2)
  )

attach(headache)
interaction.plot(treatment,gender, pain_score,type="b",col=c(2:3),leg.bty="o ",leg.bg="beige",lwd=3,pch=c(18,24) ,xlab="Treatment",ylab="Pain score",main="Wykres interakcji")

interaction.plot(risk,gender, pain_score,type="b",col=c(2:3),leg.bty="o ",leg.bg="beige",lwd=3,pch=c(18,24) ,xlab="Risk",ylab="Pain score",main="Wykres interakcji")

interaction.plot(treatment,risk, pain_score,type="b",col=c(2:3),leg.bty="o ",leg.bg="beige",lwd=3,pch=c(18,24) ,xlab="Treatment",ylab="Pain score",main="Wykres interakcji")

###nie wiedziałem jak zrobić wykres interakcji dla 4 zmiennych naraz, dlatego zrobiłem 3 dla 3 zmiennych