PROBLEMA 1

ANOVA de 1 via Medidas repetidas

file.choose()

## [1] "C:\\Users\\usuario\\OneDrive\\Escritorio\\Diseño Experimental Dr Pedro Gonzalez\\Taller ANOVA problema 1.csv"

df<-read.csv("C:\\Users\\usuario\\OneDrive\\Escritorio\\Diseño Experimental Dr Pedro Gonzalez\\Taller ANOVA problema 1.csv")
df

##   id t1 t2 t3 t4
## 1  1 16  8  8 12
## 2  2 12  9  9 10
## 3  3 12 10 10  8
## 4  4 15 13  7 11
## 5  5 18 12 12 12
## 6  6 13 13  8 10
## 7  7 18 16 10 13
## 8  8 15  9  6  6
## 9  9 20  9 11  8

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(ggpubr)
library(rstatix)

## 
## Attaching package: 'rstatix'

## The following object is masked from 'package:stats':
## 
##     filter

df <- df %>% gather(key="time",value="score",t1,t2,t3,t4) %>% convert_as_factor(id,time)
df

##    id time score
## 1   1   t1    16
## 2   2   t1    12
## 3   3   t1    12
## 4   4   t1    15
## 5   5   t1    18
## 6   6   t1    13
## 7   7   t1    18
## 8   8   t1    15
## 9   9   t1    20
## 10  1   t2     8
## 11  2   t2     9
## 12  3   t2    10
## 13  4   t2    13
## 14  5   t2    12
## 15  6   t2    13
## 16  7   t2    16
## 17  8   t2     9
## 18  9   t2     9
## 19  1   t3     8
## 20  2   t3     9
## 21  3   t3    10
## 22  4   t3     7
## 23  5   t3    12
## 24  6   t3     8
## 25  7   t3    10
## 26  8   t3     6
## 27  9   t3    11
## 28  1   t4    12
## 29  2   t4    10
## 30  3   t4     8
## 31  4   t4    11
## 32  5   t4    12
## 33  6   t4    10
## 34  7   t4    13
## 35  8   t4     6
## 36  9   t4     8

RESUMEN ESTADISTICO

df %>% group_by(time) %>% get_summary_stats(score,type="mean_sd")

## # A tibble: 4 x 5
##   time  variable     n  mean    sd
##   <fct> <chr>    <dbl> <dbl> <dbl>
## 1 t1    score        9  15.4  2.83
## 2 t2    score        9  11    2.65
## 3 t3    score        9   9    1.94
## 4 t4    score        9  10    2.29

Visualización de los datos

bxp<-ggboxplot(df,x="time",y="score")
bxp

SUPUESTOS DEL MODELO

#Outliers

df %>% group_by(time) %>% identify_outliers(score)

## [1] time       id         score      is.outlier is.extreme
## <0 rows> (or 0-length row.names)

No hay outliers en los datos, según el estadístico para identificar valores atípicos.

Normalidad

df %>% group_by(time) %>% shapiro_test(score)

## # A tibble: 4 x 4
##   time  variable statistic     p
##   <fct> <chr>        <dbl> <dbl>
## 1 t1    score        0.933 0.511
## 2 t2    score        0.896 0.231
## 3 t3    score        0.978 0.951
## 4 t4    score        0.947 0.653

Todos los p>0.05, los datos son normales.

ggqqplot(df,"score",facet.by = "time")

Los gráficos Q-Q para T1, T2, T3 y T4, nos confirma el resultado de la normalidad de Shapiro.

Cálculo del ANOVA

res.aov<-anova_test(data=df,dv=score,wid = id,within = time)
get_anova_table(res.aov)

## ANOVA Table (type III tests)
## 
##   Effect DFn DFd      F        p p<.05   ges
## 1   time   3  24 17.331 3.34e-06     * 0.532

El estadístico ANOVA nos da el valor de 3.34e-06 el cual es significativo ya que es menor al alpha 0.05. Podemos rechazar la Ho de igualdad de medias y aceptamos la Ha, en los tiempos hay diferencias significativas. Nos resta saber cuales son los tiempos que brindan la diferencia.

Comparaciones de medias

pwc<-df %>% pairwise_t_test(score~time,paired = TRUE,p.adjust.method = "bonferroni")
pwc

## # A tibble: 6 x 10
##   .y.   group1 group2    n1    n2 statistic    df         p    p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>     <dbl>    <dbl> <chr>       
## 1 score t1     t2         9     9      3.77     8 0.005     0.033    *           
## 2 score t1     t3         9     9      7.43     8 0.0000743 0.000446 ***         
## 3 score t1     t4         9     9      5.16     8 0.000866  0.005    **          
## 4 score t2     t3         9     9      1.97     8 0.084     0.504    ns          
## 5 score t2     t4         9     9      1.28     8 0.237     1        ns          
## 6 score t3     t4         9     9     -1.2      8 0.264     1        ns

Las 3 primeras comparaciones, son menos de 0.05 por tanto son significativamente distintas.

REPORTE

pwc <- pwc %>% add_xy_position(x="time")
bxp+
  stat_pvalue_manual(pwc) + 
  labs(
    subtitle = get_test_label(res.aov,detailed = TRUE),
    caption = get_pwc_label(pwc))

Se confirma el resultado del Bonferroni T1-T2, T1-T3 y T1-T4, son significativamente distintas.

CONCLUSIÓN La calidad del recuerdo no es la misma en los cuatro momentos temporales que definimos como factor tiempo.

PROBLEMA 2

file.choose()

## [1] "C:\\Users\\usuario\\OneDrive\\Escritorio\\Diseño Experimental Dr Pedro Gonzalez\\Taller ANOVA problema 2.csv"

df<-read.csv("C:\\Users\\usuario\\OneDrive\\Escritorio\\Diseño Experimental Dr Pedro Gonzalez\\Taller ANOVA problema 2.csv")
df

##    ID tratamiento T1 T2 T3 T4
## 1   1           N  6  6  3  2
## 2   2           N  7  5  5  5
## 3   3           N  4  2  1  3
## 4   4           N  7  5  3  4
## 5   5           N  6  4  4  5
## 6   6           N  5  2  1  1
## 7   1           L  8  6  4  3
## 8   2           L 10  8  5  2
## 9   3           L  7  7  2  2
## 10  4           L 11  9  3  6
## 11  5           L 10  6  4  3
## 12  6           L  9  4  3  5

library(tidyverse)
library(ggpubr)
library(rstatix)

df <- df %>% gather(key="time",value="score",T1,T2,T3,T4) %>% convert_as_factor(ID,time)
df

##    ID tratamiento time score
## 1   1           N   T1     6
## 2   2           N   T1     7
## 3   3           N   T1     4
## 4   4           N   T1     7
## 5   5           N   T1     6
## 6   6           N   T1     5
## 7   1           L   T1     8
## 8   2           L   T1    10
## 9   3           L   T1     7
## 10  4           L   T1    11
## 11  5           L   T1    10
## 12  6           L   T1     9
## 13  1           N   T2     6
## 14  2           N   T2     5
## 15  3           N   T2     2
## 16  4           N   T2     5
## 17  5           N   T2     4
## 18  6           N   T2     2
## 19  1           L   T2     6
## 20  2           L   T2     8
## 21  3           L   T2     7
## 22  4           L   T2     9
## 23  5           L   T2     6
## 24  6           L   T2     4
## 25  1           N   T3     3
## 26  2           N   T3     5
## 27  3           N   T3     1
## 28  4           N   T3     3
## 29  5           N   T3     4
## 30  6           N   T3     1
## 31  1           L   T3     4
## 32  2           L   T3     5
## 33  3           L   T3     2
## 34  4           L   T3     3
## 35  5           L   T3     4
## 36  6           L   T3     3
## 37  1           N   T4     2
## 38  2           N   T4     5
## 39  3           N   T4     3
## 40  4           N   T4     4
## 41  5           N   T4     5
## 42  6           N   T4     1
## 43  1           L   T4     3
## 44  2           L   T4     2
## 45  3           L   T4     2
## 46  4           L   T4     6
## 47  5           L   T4     3
## 48  6           L   T4     5

RESUMEN ESTADISTICO

df %>% group_by(tratamiento,time) %>% get_summary_stats(score,type="mean_sd")

## # A tibble: 8 x 6
##   tratamiento time  variable     n  mean    sd
##   <chr>       <fct> <chr>    <dbl> <dbl> <dbl>
## 1 L           T1    score        6  9.17  1.47
## 2 L           T2    score        6  6.67  1.75
## 3 L           T3    score        6  3.5   1.05
## 4 L           T4    score        6  3.5   1.64
## 5 N           T1    score        6  5.83  1.17
## 6 N           T2    score        6  4     1.67
## 7 N           T3    score        6  2.83  1.60
## 8 N           T4    score        6  3.33  1.63

Visualización de los datos

bxp<-ggboxplot(df,x="time",y="score",color="tratamiento",palette="jco")
bxp

SUPUESTOS DEL MODELO

#Outliers

df %>% group_by(tratamiento,time) %>% identify_outliers(score)

## [1] tratamiento time        ID          score       is.outlier  is.extreme 
## <0 rows> (or 0-length row.names)

No hay valores atípicos en los datos, según el estadístico.

Normalidad

df %>% group_by(tratamiento,time) %>% shapiro_test(score)

## # A tibble: 8 x 5
##   tratamiento time  variable statistic     p
##   <chr>       <fct> <chr>        <dbl> <dbl>
## 1 L           T1    score        0.958 0.804
## 2 L           T2    score        0.974 0.918
## 3 L           T3    score        0.960 0.820
## 4 L           T4    score        0.863 0.201
## 5 N           T1    score        0.908 0.421
## 6 N           T2    score        0.876 0.252
## 7 N           T3    score        0.908 0.425
## 8 N           T4    score        0.920 0.505

Los p>0.05 se acepta la Ho todos son normales. Los score se distribuyeron normalmente de acuerdo a la prueba de Shapiro.

ggqqplot(df,"score",ggtheme = theme_bw()) + facet_grid(time~tratamiento,labeller = "label_both")

ANOVA

res.aov<-anova_test(data=df,dv=score,wid=ID,within=c(tratamiento,time))
get_anova_table(res.aov)

## ANOVA Table (type III tests)
## 
##             Effect DFn DFd      F        p p<.05   ges
## 1      tratamiento   1   5 20.351 6.00e-03     * 0.275
## 2             time   3  15 38.058 2.98e-07     * 0.613
## 3 tratamiento:time   3  15  5.315 1.10e-02     * 0.186

La interacción entre el factor Tratamiento y time es estadísticamente significativa F(3,15) = 5.3 , p<0.0001

Pruebas POst-Hoc

Si la interacción es significativa

Efectos principales simples: correr un anova de una via con la primera variable.
Comparación pareada simple: correr multiples comparaciones pareadas para determinar cuales grupos son diferentes

one.way<- df %>% group_by(time) %>% anova_test(dv=score,wid=ID,within=tratamiento) %>% get_anova_table() %>% adjust_pvalue(method="bonferroni")
one.way

## # A tibble: 4 x 9
##   time  Effect        DFn   DFd       F        p `p<.05`   ges    p.adj
## * <fct> <chr>       <dbl> <dbl>   <dbl>    <dbl> <chr>   <dbl>    <dbl>
## 1 T1    tratamiento     1     5 100     0.000171 "*"     0.654 0.000684
## 2 T2    tratamiento     1     5  13.9   0.014    "*"     0.421 0.056   
## 3 T3    tratamiento     1     5   4     0.102    ""      0.068 0.408   
## 4 T4    tratamiento     1     5   0.024 0.883    ""      0.003 1

Visualizar el p.adj para T1 y T2 son significativos

pwc<- df %>% 
  group_by(time) %>% pairwise_t_test(score~tratamiento,paired=TRUE,p.adjust.method = "bonferroni")
pwc

## # A tibble: 4 x 11
##   time  .y.   group1 group2    n1    n2 statistic    df        p    p.adj
## * <fct> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>    <dbl>    <dbl>
## 1 T1    score L      N          6     6    10         5 0.000171 0.000171
## 2 T2    score L      N          6     6     3.73      5 0.014    0.014   
## 3 T3    score L      N          6     6     2         5 0.102    0.102   
## 4 T4    score L      N          6     6     0.155     5 0.883    0.883   
## # ... with 1 more variable: p.adj.signif <chr>

Para T1 y T2 son significativos

PROBLEMA 3

ANOVA de 3 vias (modelo de dos factores, con medidas repetidas de un factor)

file.choose()

## [1] "C:\\Users\\usuario\\OneDrive\\Escritorio\\Diseño Experimental Dr Pedro Gonzalez\\Taller ANOVA problema 3.csv"

df<-read.csv("C:\\Users\\usuario\\OneDrive\\Escritorio\\Diseño Experimental Dr Pedro Gonzalez\\Taller ANOVA problema 3.csv")

library(tidyverse)
library(ggpubr)
library(rstatix)

df<- df %>% gather(key="time",value="score",T1,T2,T3,T4) %>% convert_as_factor(ID,time)
df

##    ID Memory time score
## 1   1      R   T1    10
## 2   2      R   T1     9
## 3   3      R   T1     8
## 4   4      R   T1     7
## 5   5      R   T1    10
## 6   6     RA   T1     8
## 7   7     RA   T1     8
## 8   8     RA   T1     9
## 9   9     RA   T1     8
## 10 10     RA   T1     7
## 11 11     RL   T1     7
## 12 12     RL   T1     8
## 13 13     RL   T1     8
## 14 14     RL   T1     8
## 15 15     RL   T1     7
## 16  1      R   T2     8
## 17  2      R   T2     8
## 18  3      R   T2     6
## 19  4      R   T2     7
## 20  5      R   T2     9
## 21  6     RA   T2     6
## 22  7     RA   T2     7
## 23  8     RA   T2     7
## 24  9     RA   T2     6
## 25 10     RA   T2     5
## 26 11     RL   T2     5
## 27 12     RL   T2     6
## 28 13     RL   T2     6
## 29 14     RL   T2     5
## 30 15     RL   T2     5
## 31  1      R   T3     7
## 32  2      R   T3     7
## 33  3      R   T3     6
## 34  4      R   T3     6
## 35  5      R   T3     8
## 36  6     RA   T3     5
## 37  7     RA   T3     6
## 38  8     RA   T3     5
## 39  9     RA   T3     4
## 40 10     RA   T3     4
## 41 11     RL   T3     4
## 42 12     RL   T3     4
## 43 13     RL   T3     5
## 44 14     RL   T3     3
## 45 15     RL   T3     4
## 46  1      R   T4     8
## 47  2      R   T4     6
## 48  3      R   T4     7
## 49  4      R   T4     6
## 50  5      R   T4     8
## 51  6     RA   T4     3
## 52  7     RA   T4     5
## 53  8     RA   T4     6
## 54  9     RA   T4     4
## 55 10     RA   T4     5
## 56 11     RL   T4     3
## 57 12     RL   T4     4
## 58 13     RL   T4     6
## 59 14     RL   T4     4
## 60 15     RL   T4     3

RESUMEN ESTADISTICO

df %>% group_by(time) %>% get_summary_stats(score,type="mean_sd")

## # A tibble: 4 x 5
##   time  variable     n  mean    sd
##   <fct> <chr>    <dbl> <dbl> <dbl>
## 1 T1    score       15  8.13  0.99
## 2 T2    score       15  6.4   1.24
## 3 T3    score       15  5.2   1.42
## 4 T4    score       15  5.2   1.70

Visualización de los datos

bxp<-ggboxplot(df,x="time",y="score")
bxp

SUPUESTOS DEL MODELO

#Outliers

df %>% group_by(time) %>% identify_outliers(score)

## [1] time       ID         Memory     score      is.outlier is.extreme
## <0 rows> (or 0-length row.names)

Normalidad

df %>% group_by(time) %>% shapiro_test(score)

## # A tibble: 4 x 4
##   time  variable statistic      p
##   <fct> <chr>        <dbl>  <dbl>
## 1 T1    score        0.847 0.0159
## 2 T2    score        0.896 0.0823
## 3 T3    score        0.929 0.267 
## 4 T4    score        0.919 0.183

ggqqplot(df,"score",facet.by = "time")

Cálculo del ANOVA

res.aov<-anova_test(data=df,dv=score,wid = ID,within = time)
get_anova_table(res.aov)

## ANOVA Table (type III tests)
## 
##   Effect  DFn   DFd      F        p p<.05   ges
## 1   time 2.09 29.31 62.495 1.79e-11     * 0.454

Comparaciones de medias

pwc<-df %>% pairwise_t_test(score~time,paired = TRUE,p.adjust.method = "bonferroni")
pwc

## # A tibble: 6 x 10
##   .y.   group1 group2    n1    n2 statistic    df            p   p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>        <dbl>   <dbl> <chr>       
## 1 score T1     T2        15    15      9.54    14 0.000000167  1   e-6 ****        
## 2 score T1     T3        15    15     11       14 0.0000000283 1.7 e-7 ****        
## 3 score T1     T4        15    15      9.29    14 0.000000231  1.39e-6 ****        
## 4 score T2     T3        15    15      8.29    14 0.000000902  5.41e-6 ****        
## 5 score T2     T4        15    15      4.29    14 0.000742     4   e-3 **          
## 6 score T3     T4        15    15      0       14 1            1   e+0 ns

Todas las p adj.son mayor al alpha 0.05 por tanto son no significativas las medias.

REPORTE

pwc <- pwc %>% add_xy_position(x="time")
bxp+
  stat_pvalue_manual(pwc) + 
  labs(
    subtitle = get_test_label(res.aov,detailed = TRUE),
    caption = get_pwc_label(pwc))

CONCLUSION

El reporte nos confirma que el experimento en los 4 niveles intrasujetos no poseen medias significativas, la reproducción de experimento se desarrolla con tendencia normal y no se obtienen datos anomalos.

PROBLEMA 4

ANOVA de 1 via Medidas repetidas

file.choose()

## [1] "C:\\Users\\usuario\\OneDrive\\Escritorio\\Diseño Experimental Dr Pedro Gonzalez\\Taller ANOVA problema 4.csv"

df<-read.csv("C:\\Users\\usuario\\OneDrive\\Escritorio\\Diseño Experimental Dr Pedro Gonzalez\\Taller ANOVA problema 4.csv")
df

##    ID  T1  T2  T3  T4
## 1   1 108  96 110 122
## 2   2 103 117 127 133
## 3   3  96 107 106 107
## 4   4  84  85  92  99
## 5   5 118 125 125 116
## 6   6 110 107  96  91
## 7   7 129 128 123 128
## 8   8  90  84 101 113
## 9   9  84 104 100  88
## 10 10  96 100 103 105
## 11 11 105 114 105 112
## 12 12 113 117 132 130

library(tidyverse)
library(ggpubr)
library(rstatix)

df <- df %>% gather(key="time",value="score",T1,T2,T3,T4) %>% convert_as_factor(ID,time)
df

##    ID time score
## 1   1   T1   108
## 2   2   T1   103
## 3   3   T1    96
## 4   4   T1    84
## 5   5   T1   118
## 6   6   T1   110
## 7   7   T1   129
## 8   8   T1    90
## 9   9   T1    84
## 10 10   T1    96
## 11 11   T1   105
## 12 12   T1   113
## 13  1   T2    96
## 14  2   T2   117
## 15  3   T2   107
## 16  4   T2    85
## 17  5   T2   125
## 18  6   T2   107
## 19  7   T2   128
## 20  8   T2    84
## 21  9   T2   104
## 22 10   T2   100
## 23 11   T2   114
## 24 12   T2   117
## 25  1   T3   110
## 26  2   T3   127
## 27  3   T3   106
## 28  4   T3    92
## 29  5   T3   125
## 30  6   T3    96
## 31  7   T3   123
## 32  8   T3   101
## 33  9   T3   100
## 34 10   T3   103
## 35 11   T3   105
## 36 12   T3   132
## 37  1   T4   122
## 38  2   T4   133
## 39  3   T4   107
## 40  4   T4    99
## 41  5   T4   116
## 42  6   T4    91
## 43  7   T4   128
## 44  8   T4   113
## 45  9   T4    88
## 46 10   T4   105
## 47 11   T4   112
## 48 12   T4   130

RESUMEN ESTADISTICO

df %>% group_by(time) %>% get_summary_stats(score,type="mean_sd")

## # A tibble: 4 x 5
##   time  variable     n  mean    sd
##   <fct> <chr>    <dbl> <dbl> <dbl>
## 1 T1    score       12   103  13.7
## 2 T2    score       12   107  14.2
## 3 T3    score       12   110  13.3
## 4 T4    score       12   112  14.8

Visualización de los datos

bxp<-ggboxplot(df,x="time",y="score")
bxp

SUPUESTOS DEL MODELO

#Outliers

df %>% group_by(time) %>% identify_outliers(score)

## [1] time       ID         score      is.outlier is.extreme
## <0 rows> (or 0-length row.names)

Normalidad

df %>% group_by(time) %>% shapiro_test(score)

## # A tibble: 4 x 4
##   time  variable statistic     p
##   <fct> <chr>        <dbl> <dbl>
## 1 T1    score        0.968 0.883
## 2 T2    score        0.957 0.736
## 3 T3    score        0.911 0.222
## 4 T4    score        0.960 0.778

ggqqplot(df,"score",facet.by = "time")

Cálculo del ANOVA”

res.aov<-anova_test(data=df,dv=score,wid = ID,within = time)
get_anova_table(res.aov)

## ANOVA Table (type III tests)
## 
##   Effect  DFn   DFd     F     p p<.05  ges
## 1   time 1.83 20.11 3.027 0.075       0.06

Comparaciones de medias

pwc<-df %>% pairwise_t_test(score~time,paired = TRUE,p.adjust.method = "bonferroni")
pwc

## # A tibble: 6 x 10
##   .y.   group1 group2    n1    n2 statistic    df     p p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <dbl> <chr>       
## 1 score T1     T2        12    12    -1.55     11 0.149 0.894 ns          
## 2 score T1     T3        12    12    -2.30     11 0.042 0.253 ns          
## 3 score T1     T4        12    12    -2.44     11 0.033 0.198 ns          
## 4 score T2     T3        12    12    -1.09     11 0.3   1     ns          
## 5 score T2     T4        12    12    -1.16     11 0.271 1     ns          
## 6 score T3     T4        12    12    -0.896    11 0.39  1     ns

Todas las p adj. son mayores de 0.05 por tanto sus medias son no significativas.

REPORTE

pwc <- pwc %>% add_xy_position(x="time")
bxp+
  stat_pvalue_manual(pwc) + 
  labs(
    subtitle = get_test_label(res.aov,detailed = TRUE),
    caption = get_pwc_label(pwc))

Taller ANOVA de 1, 2 y 3 vias

Angélica Reyes

10/20/2021

PROBLEMA 1

ANOVA de 1 via Medidas repetidas

RESUMEN ESTADISTICO

Visualización de los datos

SUPUESTOS DEL MODELO

Normalidad

Cálculo del ANOVA

Comparaciones de medias

REPORTE

PROBLEMA 2

RESUMEN ESTADISTICO

Visualización de los datos

SUPUESTOS DEL MODELO

Normalidad

ANOVA

Pruebas POst-Hoc

PROBLEMA 3

ANOVA de 3 vias (modelo de dos factores, con medidas repetidas de un factor)

RESUMEN ESTADISTICO

Visualización de los datos

SUPUESTOS DEL MODELO

Normalidad

Cálculo del ANOVA

Comparaciones de medias

REPORTE

CONCLUSION

PROBLEMA 4

ANOVA de 1 via Medidas repetidas

RESUMEN ESTADISTICO

Visualización de los datos

SUPUESTOS DEL MODELO

Normalidad

Cálculo del ANOVA”

Comparaciones de medias

REPORTE