Clase DE 02/06

#DISEÑO EN MEDIDAS REPETIDAS #Primeros diseños tiempo juega un papel en el modelado, los anteriores no tomaban esta variable.

#Factor - intrasujetos= Tiempo #Factor - entresuejetos = FSCA, FSBA, FCCA, FCBA. #Medidas repetidas en: una, dos y tres vías.

#MEDIDAS REPETIDAS DE UNA VÍA Antes llamado #PREPARACIÓN DE LOS DATOS #UNA VIA: Cuado hay un solo factor y solo está el TIEMPO #id:Tiempo se convierte como factor no como número

#De formato largo a ancho

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
# install.packages("datarium")
data("selfesteem", package = "datarium")

datos = selfesteem
datos = datos  %>%
  gather(key = "tiempo",
         value = "rto",
         t1, t2, t3) %>%
  mutate_at(vars(id, tiempo), as.factor)
boxplot(datos)

#Una vía = cuando hay un solo factor y es el tiempo. No tratamiento. No bloque. Una sola respuesta. #Cortes en diferentes tiempos (tiempos equidistantes) t1 = 30 días despues de la siembra. t2 = 60 días despues de la siembra. t3 = 90 días despues de la siembra

datos%>%
  group_by(tiempo)%>%
  summarise(media = mean (rto), 
            desv = sd (rto),
            n = n (),
            cv =100*desv/media)

## # A tibble: 3 × 5
##   tiempo media  desv     n    cv
##   <fct>  <dbl> <dbl> <int> <dbl>
## 1 t1      3.14 0.552    10  17.6
## 2 t2      4.93 0.863    10  17.5
## 3 t3      7.64 1.14     10  15.0

#NOTA: Coeficiente varianzacion <20% se asocian a datos con comprotamiento normal, es decir, es bueno. (POCA VARIABILIDAD) 
#SI son superiores a 20% se observa heterogeneidad. 

boxplot(datos$rto~ datos$tiempo)

#DETECCIÓN OUTLIER
 
library(rstatix)

## 
## Attaching package: 'rstatix'

## The following object is masked from 'package:stats':
## 
##     filter

datos %>%
  group_by(tiempo) %>%
  identify_outliers(rto)

## # A tibble: 2 × 5
##   tiempo id      rto is.outlier is.extreme
##   <fct>  <fct> <dbl> <lgl>      <lgl>     
## 1 t1     6      2.05 TRUE       FALSE     
## 2 t2     2      6.91 TRUE       FALSE

#PROBANDO NORMALIDAD
datos %>%
  group_by(tiempo) %>%
  shapiro_test(rto)

## # A tibble: 3 × 4
##   tiempo variable statistic     p
##   <fct>  <chr>        <dbl> <dbl>
## 1 t1     rto          0.967 0.859
## 2 t2     rto          0.876 0.117
## 3 t3     rto          0.923 0.380

# Aparecen p-valores, se le hace prueba de normalidad a cada tiempo (TODOS LOS DATOS EN CADA TIEMPO SON NORMALES)

#SUPUESTO DE ESPERICIDAD (IGUALDAD DE VARIANZAS) #Compara las varianzas entre tiempos
res.aov <-anova_test(data = datos, dv = rto, wid = id, within=tiempo)
#ESFERICIDAD
res.aov$`Mauchly's Test for Sphericity`

##   Effect     W     p p<.05
## 1 tiempo 0.551 0.092

get_anova_table(res.aov)

## ANOVA Table (type III tests)
## 
##   Effect DFn DFd      F        p p<.05   ges
## 1 tiempo   2  18 55.469 2.01e-08     * 0.829

#No hay problema de varianzas desiguales. #SI LA PRUEBA DE MAUCHLY— da con varianzas desiguales se debe usar otra metodologia (buscar cual es)
#ges (tamaño del efecto) #Se rechaza H0, es decir, el aceite que se produce en los tres tiempos es diferente. Es decir, el tiempo de corte tiene efecto en la abundancia en el aceite. EL corte tres da más aceite. #no se usa tukey sino bonferroni

#se usa el p-valor ajustado (p.adj)
datos %>%
  pairwise_t_test(
    rto ~ tiempo,
    paired = TRUE,
    p.adjust.method 
    = "bonferroni")

## # A tibble: 3 × 10
##   .y.   group1 group2    n1    n2 statistic    df           p p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>       <dbl> <dbl> <chr>       
## 1 rto   t1     t2        10    10     -4.97     9 0.000772     2e-3 **          
## 2 rto   t1     t3        10    10    -13.2      9 0.000000334  1e-6 ****        
## 3 rto   t2     t3        10    10     -4.87     9 0.000886     3e-3 **

## # A tibble: 3 × 10

res.aov <- anova_test(data = datos,
                      dv = rto,
                      wid = id,
                      within = tiempo)

# Esfericidad
res.aov$`Mauchly's Test for Sphericity`

##   Effect     W     p p<.05
## 1 tiempo 0.551 0.092

get_anova_table(res.aov)

## ANOVA Table (type III tests)
## 
##   Effect DFn DFd      F        p p<.05   ges
## 1 tiempo   2  18 55.469 2.01e-08     * 0.829

#NOTA: El p-valor que se usa es el ajustado (todos se rechazan) El rendimiento del t1 y t2 son diferentes en cuanto a rendimiento.

#se usa el p-valor ajustado (p.adj)

datos %>%
  pairwise_t_test(
    rto ~ tiempo,
    paired = TRUE,
    p.adjust.method = 
      "bonferroni")

## # A tibble: 3 × 10
##   .y.   group1 group2    n1    n2 statistic    df           p p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>       <dbl> <dbl> <chr>       
## 1 rto   t1     t2        10    10     -4.97     9 0.000772     2e-3 **          
## 2 rto   t1     t3        10    10    -13.2      9 0.000000334  1e-6 ****        
## 3 rto   t2     t3        10    10     -4.87     9 0.000886     3e-3 **

#Resultado: Todos los tiempo son diferentes en rendimiento. El mejor es el t3. Los peores con el t1 y t2.

#DOS VÍAS: Tiempo y un factor Se aplicó un fertilizante. Interesados en saber si la fertilización ayudó y el tiempo influyó.

data("selfesteem2", package = "datarium")

datos2 = selfesteem2

datos2$treatment = gl(2,12,24, c('con fert', 'sin fert'))

##convertir formato largo

datos2 = datos2 %>% 
  gather(key='tiempo', value = 'rto',
         t1,t2,t3)

#Resumen estadistico
datos2 %>%
  group_by(treatment, tiempo) %>%
  summarise(media = mean(rto),
            desv = sd(rto),
            n = n(),
            cv = 100*desv/media)

## `summarise()` has grouped output by 'treatment'. You can override using the
## `.groups` argument.

## # A tibble: 6 × 6
## # Groups:   treatment [2]
##   treatment tiempo media  desv     n    cv
##   <fct>     <chr>  <dbl> <dbl> <int> <dbl>
## 1 con fert  t1      88    8.08    12  9.18
## 2 con fert  t2      83.8 10.2     12 12.2 
## 3 con fert  t3      78.7 10.5     12 13.4 
## 4 sin fert  t1      87.6  7.62    12  8.70
## 5 sin fert  t2      87.8  7.42    12  8.45
## 6 sin fert  t3      87.7  8.14    12  9.28

#Coeficientes de variancion <20%, son homogeneos los datos. El que tiene mejor aceite es el que tiene la media más alta.

#Visualización
library(ggplot2)

ggplot(datos2)+
  aes(tiempo, rto, fill=treatment)+
  geom_boxplot()

# Con fertilizante los t1 y t2 parecen iguales, si el cuadro es más grande (tiene mayor variabilidad). Lo que muestra el boxplot es que fertilizar o no fertilizar da lo mismo en el rendimiento.

# Analisis outliner
datos2 %>%
  group_by(treatment, tiempo)%>%
  identify_outliers(rto)

## [1] treatment  tiempo     id         rto        is.outlier is.extreme
## <0 rows> (or 0-length row.names)

#REVISIÓN SUPUESTOS
datos2 %>%
group_by(treatment, tiempo) %>%
  shapiro_test(rto)

## # A tibble: 6 × 5
##   treatment tiempo variable statistic      p
##   <fct>     <chr>  <chr>        <dbl>  <dbl>
## 1 con fert  t1     rto          0.828 0.0200
## 2 con fert  t2     rto          0.868 0.0618
## 3 con fert  t3     rto          0.887 0.107 
## 4 sin fert  t1     rto          0.919 0.279 
## 5 sin fert  t2     rto          0.923 0.316 
## 6 sin fert  t3     rto          0.886 0.104

#Se hace sobre los datos de aceite y no de los residuales.
#ANALISIS DE VARIANZA
res.aov <- anova_test(
  data = datos2,
  dv = rto,
  wid = id,
  within = c(treatment,
             tiempo)
  )
get_anova_table(res.aov)

## ANOVA Table (type III tests)
## 
##             Effect  DFn   DFd      F        p p<.05   ges
## 1        treatment 1.00 11.00 15.541 2.00e-03     * 0.059
## 2           tiempo 1.31 14.37 27.369 5.03e-05     * 0.049
## 3 treatment:tiempo 2.00 22.00 30.424 4.63e-07     * 0.050

#Nota: El primer valor que se revisa es la interaccion, <5% si hay interacción, por tanto, no se mira ni tiempo ni tratamiento. NO SE DEBEN HACER COMPARACIONES CUANDO HAY INTERACCION

#PROCEDIMIENTO CUANDO HAY INTERACCIÓN #GRAFICO DE INTERACCIÓN
interaction.plot(datos2$tiempo,
                 datos2$treatment,
                 datos2$rto)

res.aov$`Mauchly's Test for Sphericity`

##             Effect     W     p p<.05
## 1           tiempo 0.469 0.023     *
## 2 treatment:tiempo 0.616 0.089

datos2 %>% 
  group_by(tiempo, treatment) %>% 
  summarise(mean_rto = mean(rto)) %>% 
  ggplot()+
  aes(tiempo, mean_rto,
      color=treatment,
      group=treatment)+
  geom_point(size=5)+
  geom_line(linewidth=3)

## `summarise()` has grouped output by 'tiempo'. You can override using the
## `.groups` argument.

##NOTA:SI LAS LINEAS SE CRUZAN HAY INTERACCION. Se observa una diferencia en el momento t2 y t3 con fertilizante y en el t1 no hay diferencias, por tanto, hay interaccion. Si no hubiese interaccion, serian iguales las lineas

Clase DE 02/06

Carlos S

2023-06-07