library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(datarium)
library(rstatix)
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
# Wide format
set.seed(123)
data("selfesteem2", package = "datarium")
selfesteem2 %>% sample_n_by(treatment, size = 1)
## # A tibble: 2 x 5
## id treatment t1 t2 t3
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 3 ctr 93 92 89
## 2 3 Diet 91 91 92
selfesteem2
## # A tibble: 24 x 5
## id treatment t1 t2 t3
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 1 ctr 83 77 69
## 2 2 ctr 97 95 88
## 3 3 ctr 93 92 89
## 4 4 ctr 92 92 89
## 5 5 ctr 77 73 68
## 6 6 ctr 72 65 63
## 7 7 ctr 92 89 79
## 8 8 ctr 92 87 81
## 9 9 ctr 95 91 84
## 10 10 ctr 92 84 81
## # ... with 14 more rows
df=data_frame(selfesteem2); df
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## Please use `tibble()` instead.
## # A tibble: 24 x 5
## id treatment t1 t2 t3
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 1 ctr 83 77 69
## 2 2 ctr 97 95 88
## 3 3 ctr 93 92 89
## 4 4 ctr 92 92 89
## 5 5 ctr 77 73 68
## 6 6 ctr 72 65 63
## 7 7 ctr 92 89 79
## 8 8 ctr 92 87 81
## 9 9 ctr 95 91 84
## 10 10 ctr 92 84 81
## # ... with 14 more rows
View(df)
# Gather the columns t1, t2 and t3 into long format.
# Convert id and time into factor variables
selfesteem2 <- selfesteem2 %>%
gather(key = "time", value = "score", t1, t2, t3) %>%
convert_as_factor(id, time)
# Inspect some random rows of the data by groups
set.seed(123)
selfesteem2 %>% sample_n_by(treatment, time, size = 1)
## # A tibble: 6 x 4
## id treatment time score
## <fct> <fct> <fct> <dbl>
## 1 3 ctr t1 93
## 2 3 ctr t2 92
## 3 10 ctr t3 81
## 4 2 Diet t1 100
## 5 6 Diet t2 75
## 6 11 Diet t3 91
df2<-data.frame(selfesteem2); df2
## id treatment time score
## 1 1 ctr t1 83
## 2 2 ctr t1 97
## 3 3 ctr t1 93
## 4 4 ctr t1 92
## 5 5 ctr t1 77
## 6 6 ctr t1 72
## 7 7 ctr t1 92
## 8 8 ctr t1 92
## 9 9 ctr t1 95
## 10 10 ctr t1 92
## 11 11 ctr t1 92
## 12 12 ctr t1 79
## 13 1 Diet t1 84
## 14 2 Diet t1 100
## 15 3 Diet t1 91
## 16 4 Diet t1 91
## 17 5 Diet t1 74
## 18 6 Diet t1 76
## 19 7 Diet t1 90
## 20 8 Diet t1 89
## 21 9 Diet t1 93
## 22 10 Diet t1 90
## 23 11 Diet t1 93
## 24 12 Diet t1 80
## 25 1 ctr t2 77
## 26 2 ctr t2 95
## 27 3 ctr t2 92
## 28 4 ctr t2 92
## 29 5 ctr t2 73
## 30 6 ctr t2 65
## 31 7 ctr t2 89
## 32 8 ctr t2 87
## 33 9 ctr t2 91
## 34 10 ctr t2 84
## 35 11 ctr t2 92
## 36 12 ctr t2 69
## 37 1 Diet t2 86
## 38 2 Diet t2 99
## 39 3 Diet t2 91
## 40 4 Diet t2 92
## 41 5 Diet t2 76
## 42 6 Diet t2 75
## 43 7 Diet t2 87
## 44 8 Diet t2 89
## 45 9 Diet t2 94
## 46 10 Diet t2 93
## 47 11 Diet t2 92
## 48 12 Diet t2 80
## 49 1 ctr t3 69
## 50 2 ctr t3 88
## 51 3 ctr t3 89
## 52 4 ctr t3 89
## 53 5 ctr t3 68
## 54 6 ctr t3 63
## 55 7 ctr t3 79
## 56 8 ctr t3 81
## 57 9 ctr t3 84
## 58 10 ctr t3 81
## 59 11 ctr t3 91
## 60 12 ctr t3 62
## 61 1 Diet t3 88
## 62 2 Diet t3 97
## 63 3 Diet t3 92
## 64 4 Diet t3 95
## 65 5 Diet t3 72
## 66 6 Diet t3 76
## 67 7 Diet t3 87
## 68 8 Diet t3 88
## 69 9 Diet t3 93
## 70 10 Diet t3 95
## 71 11 Diet t3 91
## 72 12 Diet t3 78
View(df2)
Group the data by treatment and time, and then compute some summary statistics of the score variable: mean and sd (standard deviation).
selfesteem2 %>%
group_by(treatment, time) %>%
get_summary_stats(score, type = "mean_sd")
## # A tibble: 6 x 6
## treatment time variable n mean sd
## <fct> <fct> <chr> <dbl> <dbl> <dbl>
## 1 ctr t1 score 12 88 8.08
## 2 ctr t2 score 12 83.8 10.2
## 3 ctr t3 score 12 78.7 10.5
## 4 Diet t1 score 12 87.6 7.62
## 5 Diet t2 score 12 87.8 7.42
## 6 Diet t3 score 12 87.7 8.14
Create box plots of the score colored by treatment groups:
library(ggpubr)
## Loading required package: ggplot2
bxp <- ggboxplot(
selfesteem2, x = "time", y = "score",
color = "treatment", palette = "jco"
)
bxp
# Chek assumptions
#Revision de Datos atípicos en la variable respuesta
selfesteem2 %>%
group_by(treatment, time) %>%
identify_outliers(score)
## [1] treatment time id score is.outlier is.extreme
## <0 rows> (or 0-length row.names)
selfesteem2 %>%
group_by(treatment, time) %>%
shapiro_test(score)
## # A tibble: 6 x 5
## treatment time variable statistic p
## <fct> <fct> <chr> <dbl> <dbl>
## 1 ctr t1 score 0.828 0.0200
## 2 ctr t2 score 0.868 0.0618
## 3 ctr t3 score 0.887 0.107
## 4 Diet t1 score 0.919 0.279
## 5 Diet t2 score 0.923 0.316
## 6 Diet t3 score 0.886 0.104
res.aov <- anova_test(
data = selfesteem2, dv = score, wid = id,
within = c(treatment, time)
)
get_anova_table(res.aov)
## ANOVA Table (type III tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 treatment 1.00 11.00 15.541 2.00e-03 * 0.059
## 2 time 1.31 14.37 27.369 5.03e-05 * 0.049
## 3 treatment:time 2.00 22.00 30.424 4.63e-07 * 0.050
Interaccion presente, los otros p-valores no se pueden interpretar
Effect of treatment. In our example, we’ll analyze the effect of treatment on self-esteem score at every time point.
# Effect of treatment at each time point
one.way <- selfesteem2 %>%
group_by(time) %>%
anova_test(dv = score, wid = id, within = treatment) %>%
get_anova_table() %>%
adjust_pvalue(method = "bonferroni")
one.way
## # A tibble: 3 x 9
## time Effect DFn DFd F p `p<.05` ges p.adj
## * <fct> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 t1 treatment 1 11 0.376 0.552 "" 0.000767 1
## 2 t2 treatment 1 11 9.03 0.012 "*" 0.052 0.036
## 3 t3 treatment 1 11 30.9 0.00017 "*" 0.199 0.00051