knitr::opts_chunk$set(echo = TRUE)
options(digits = 3)
## Load the libraries we will be using
pacman::p_load(tidyverse, Rfast, heplots,
GGally, MVN, car, rstatix)
## Changing the default theme to black white
theme_set(theme_bw())
theme_update(
axis.title = element_text(size = 10),
axis.text = element_text(size = 10),
plot.title = element_text(size = 14, hjust = 0.5),
plot.subtitle = element_text(size = 10, hjust = 0.5),
plot.caption = element_text(size = 8)
)
# Reading in the data:
turkey <- readxl::read_excel("turkey.xlsx")
# Let's change Trt to a factor and adding the name of the level
# and change Day to a factor as well
turkey <-
turkey |>
rename(Salt = Trt) |>
mutate(
Salt = factor(
Salt,
labels = c("Control", "STP3", "STP5",
"SASMP3", "SASMP5")),
Day = as_factor(Day)
)
# skim creates the basic summary stats for each variable
skimr::skim(turkey)
Name | turkey |
Number of rows | 25 |
Number of columns | 7 |
_______________________ | |
Column type frequency: | |
factor | 2 |
numeric | 5 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Day | 0 | 1 | FALSE | 5 | 1: 5, 2: 5, 3: 5, 4: 5 |
Salt | 0 | 1 | FALSE | 5 | Con: 5, STP: 5, STP: 5, SAS: 5 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Cooking_Loss | 0 | 1 | 34.20 | 2.86 | 26.80 | 33.00 | 34.20 | 36.20 | 38.50 | ▂▁▇▅▇ |
Ph | 0 | 1 | 6.61 | 0.23 | 6.20 | 6.49 | 6.64 | 6.72 | 7.34 | ▃▇▇▁▁ |
Moist | 0 | 1 | 60.22 | 1.67 | 56.25 | 59.57 | 60.40 | 61.37 | 63.37 | ▂▁▆▇▁ |
Fat | 0 | 1 | 10.00 | 0.62 | 9.18 | 9.52 | 10.00 | 10.18 | 11.72 | ▆▇▃▁▂ |
Hex | 0 | 1 | 0.77 | 0.43 | 0.20 | 0.40 | 0.73 | 1.00 | 1.58 | ▇▇▆▂▅ |
Similar to our preliminary analysis from the previous example, but now let’s also count the number of blocks as b!
# Total sample size (N)
N <- nrow(turkey)
# number of treatments (k)
k <- n_distinct(turkey$Salt)
# Calculating the number of blocks: b
b <- n_distinct(turkey$Day)
# Number of response variables (p)
p <-
turkey |>
dplyr::select(Cooking_Loss:Hex) |>
ncol()
c("N" = N, "groups" = k, "blocks" = b, "variables" = p)
## N groups blocks variables
## 25 5 5 5
Since the design is balance, the individual sample size is N/k
n_groups <-
turkey |>
count(Salt) |>
dplyr::select(n) |>
unlist()
n_groups
## n1 n2 n3 n4 n5
## 5 5 5 5 5
Calculating the overall mean of each variable:
overallmean <-
turkey |>
dplyr::select(where(is.numeric)) |>
colMeans()
overallmean
## Cooking_Loss Ph Moist Fat Hex
## 34.200 6.610 60.216 10.004 0.773
Finding the mean vector for each salt type (i): \(\bar{\mathbf{y}}_{i\bullet}\)
salt_means <-
turkey |>
summarize(
.by = Salt,
across(.cols = where(is.numeric),
.fns = mean)
)
salt_means
## # A tibble: 5 × 6
## Salt Cooking_Loss Ph Moist Fat Hex
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control 37.1 6.53 58.2 10.4 1.06
## 2 STP3 34.3 6.58 60.7 9.81 0.722
## 3 STP5 30.7 6.68 61.8 9.69 0.308
## 4 SASMP3 34.9 6.55 60.2 10.3 1.18
## 5 SASMP5 34 6.71 60.2 9.87 0.596
Finding the mean vector for each block (j, Day): \(\bar{\mathbf{y}}_{\bullet j}\)
day_means <-
turkey |>
summarize(
.by = Day,
across(.cols = where(is.numeric),
.fns = mean)
)
day_means
## # A tibble: 5 × 6
## Day Cooking_Loss Ph Moist Fat Hex
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 33.9 6.41 60.0 10.4 1.09
## 2 2 32.3 6.67 61.0 9.43 0.554
## 3 3 32.9 6.61 59.5 10.3 0.468
## 4 4 35.9 6.76 60.7 10.1 0.966
## 5 5 36.0 6.60 59.9 9.91 0.79
#### Fitting a CRD MANOVA model in R
turkey_CRD <-
manova(
cbind(Cooking_Loss, Ph, Moist, Fat, Hex) ~ Salt,
data = turkey
)
turkey_CRD
## Call:
## manova(cbind(Cooking_Loss, Ph, Moist, Fat, Hex) ~ Salt, data = turkey)
##
## Terms:
## Salt Residuals
## Cooking_Loss 106.7 90.2
## Ph 0.1 1.1
## Moist 34.9 32.2
## Fat 1.8 7.3
## Hex 2.5 2.0
## Deg. of Freedom 4 20
##
## Residual standard errors: 2.12 0.237 1.27 0.604 0.314
## Estimated effects may be unbalanced
Adding the effect of day to make a RCBD
turkey_RCBD <-
manova(
cbind(Cooking_Loss, Ph, Moist, Fat, Hex) ~ Salt + Day,
data = turkey
)
turkey_RCBD
## Call:
## manova(cbind(Cooking_Loss, Ph, Moist, Fat, Hex) ~ Salt + Day,
## data = turkey)
##
## Terms:
## Salt Day Residuals
## Cooking_Loss 106.7 57.5 32.6
## Ph 0.1 0.3 0.8
## Moist 34.9 7.2 25.0
## Fat 1.8 2.7 4.6
## Hex 2.5 1.4 0.6
## Deg. of Freedom 4 4 16
##
## Residual standard errors: 1.43 0.222 1.25 0.537 0.191
## Estimated effects may be unbalanced
Notice that the day + residuals from the RCBD = residuals from CRD, while the effect of salt type remains unchanged!
summary(turkey_CRD)$SS$Salt
## Cooking_Loss Ph Moist Fat Hex
## Cooking_Loss 106.72 -2.856 -57.93 11.691 13.865
## Ph -2.86 0.138 1.40 -0.397 -0.501
## Moist -57.93 1.397 34.85 -6.565 -6.812
## Fat 11.69 -0.397 -6.57 1.849 2.018
## Hex 13.87 -0.501 -6.81 2.018 2.488
summary(turkey_RCBD)$SS$Salt
## Cooking_Loss Ph Moist Fat Hex
## Cooking_Loss 106.72 -2.856 -57.93 11.691 13.865
## Ph -2.86 0.138 1.40 -0.397 -0.501
## Moist -57.93 1.397 34.85 -6.565 -6.812
## Fat 11.69 -0.397 -6.57 1.849 2.018
## Hex 13.87 -0.501 -6.81 2.018 2.488
summary(turkey_CRD)$SS$Residuals
## Cooking_Loss Ph Moist Fat Hex
## Cooking_Loss 90.156 -0.2628 -11.80 0.34 3.6296
## Ph -0.263 1.1217 1.64 -0.27 -0.0348
## Moist -11.802 1.6350 32.19 -4.61 1.9382
## Fat 0.340 -0.2699 -4.61 7.29 1.2521
## Hex 3.630 -0.0348 1.94 1.25 1.9677
summary(turkey_RCBD)$SS$Residuals + summary(turkey_RCBD)$SS$Day
## Cooking_Loss Ph Moist Fat Hex
## Cooking_Loss 90.156 -0.2628 -11.80 0.34 3.6296
## Ph -0.263 1.1217 1.64 -0.27 -0.0348
## Moist -11.802 1.6350 32.19 -4.61 1.9382
## Fat 0.340 -0.2699 -4.61 7.29 1.2521
## Hex 3.630 -0.0348 1.94 1.25 1.9677
By accounting for the effect of Day, we don’t change the performance of the Salts, but we do remove some of the unexplained differences remaining in our error terms!
Manova(
turkey_RCBD,
test = "Wilks",
type = 3
)
##
## Type III MANOVA Tests: Wilks test statistic
## Df test stat approx F num Df den Df Pr(>F)
## (Intercept) 1 0.0008 2967 5 12.0 < 2e-16 ***
## Salt 4 0.0365 3 20 40.7 0.00035 ***
## Day 4 0.0355 4 20 40.7 0.00031 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Manova(
turkey_CRD,
test = "Wilks",
type = 3
)
##
## Type III MANOVA Tests: Wilks test statistic
## Df test stat approx F num Df den Df Pr(>F)
## (Intercept) 1 0.0007 4652 5 16 <2e-16 ***
## Salt 4 0.1715 2 20 54 0.033 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Checking the results using Pillai’s statistic
Manova(
turkey_RCBD,
test = "Pillai",
type = 3
)
##
## Type III MANOVA Tests: Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## (Intercept) 1 0.999 2967 5 12 <2e-16 ***
## Salt 4 1.433 2 20 60 0.0644 .
## Day 4 1.900 3 20 60 0.0015 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Manova(
turkey_CRD,
test = "Pillai",
type = 3
)
##
## Type III MANOVA Tests: Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## (Intercept) 1 0.999 4652 5 16 <2e-16 ***
## Salt 4 1.088 1 20 76 0.14
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
By accounting for the day, the effect of salt became more evident!
Note that the salt effect isn’t stronger, but we removed some of the noise to make it easier to notice a changed caused by the salt types.
res_RCBD <-
turkey_RCBD |>
pluck(residuals) |>
data.frame() |>
add_column(Salt = turkey$Salt)
res_RCBD
## Cooking_Loss Ph Moist Fat Hex Salt
## 1 -0.58 -0.1256 -0.9224 1.0204 -0.0492 Control
## 2 2.20 -0.0416 -0.4504 -0.1556 0.1448 STP3
## 3 1.14 0.0364 0.3376 -0.1856 -0.1212 STP5
## 4 -1.58 0.1424 0.0096 -0.3376 0.0548 SASMP3
## 5 -1.18 -0.0116 1.0256 -0.3416 -0.0292 SASMP5
## 6 -0.38 -0.0996 2.4496 -0.4216 0.1628 Control
## 7 0.60 -0.0256 -1.4584 0.0924 -0.1032 STP3
## 8 -1.96 -0.0176 0.7596 0.9724 0.2108 STP5
## 9 1.02 0.0584 -1.1484 -0.5296 -0.2132 SASMP3
## 10 0.72 0.0844 -0.6024 -0.1136 -0.0572 SASMP5
## 11 1.66 -0.3296 -0.4844 -0.6816 -0.1712 Control
## 12 0.14 0.0844 0.8976 0.1224 0.0628 STP3
## 13 -1.62 0.0924 0.7656 -0.5576 0.1968 STP5
## 14 0.56 0.0884 -0.1624 0.7604 -0.1472 SASMP3
## 15 -0.74 0.0644 -1.0164 0.3564 0.0588 SASMP5
## 16 -0.32 0.6664 0.5896 0.1604 0.2308 Control
## 17 -1.04 -0.1196 -0.0084 0.1744 -0.0152 STP3
## 18 1.40 -0.1816 -1.9204 -0.2356 -0.1812 STP5
## 19 -0.82 -0.2256 0.4116 0.1424 0.2048 SASMP3
## 20 0.78 -0.1396 0.9276 -0.2416 -0.2392 SASMP5
## 21 -0.38 -0.1116 -1.6324 -0.0776 -0.1732 Control
## 22 -1.90 0.1024 1.0196 -0.2336 -0.0892 STP3
## 23 1.04 0.0704 0.0576 0.0064 -0.1052 STP5
## 24 0.82 -0.0636 0.8896 -0.0356 0.1008 SASMP3
## 25 0.42 0.0024 -0.3344 0.3404 0.2668 SASMP5
#### MVN for the residuals #####
mvn(
res_RCBD |> select(-Salt),
mvnTest="mardia",
multivariatePlot = "qq",
desc = F,
univariateTest = "SW"
)
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 30.9206335425045 0.66543282841103 YES
## 2 Mardia Kurtosis -0.538068348354928 0.590529863133813 YES
## 3 MVN <NA> <NA> YES
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk Cooking_Loss 0.965 0.5120 YES
## 2 Shapiro-Wilk Ph 0.829 0.0007 NO
## 3 Shapiro-Wilk Moist 0.970 0.6507 YES
## 4 Shapiro-Wilk Fat 0.930 0.0853 YES
## 5 Shapiro-Wilk Hex 0.939 0.1407 YES
Small sample, but we still reject that Ph is univariate Normal, so there is evidence that the complete data isn’t MVN
Can’t check equal covariance assumption since b = p
Using Levene’s test to check for equal variance across salts with the day effect removed: using the residuals from just the day model.
res_RCBD |>
pivot_longer(
cols = Cooking_Loss:Hex,
names_to = "variable",
values_to = "residual"
) |>
group_by(variable) |>
levene_test(residual ~ Salt) |>
data.frame() |>
arrange(p)
## variable df1 df2 statistic p
## 1 Fat 4 20 0.657 0.629
## 2 Ph 4 20 0.646 0.636
## 3 Cooking_Loss 4 20 0.501 0.735
## 4 Moist 4 20 0.409 0.800
## 5 Hex 4 20 0.273 0.892
Although we can’t compare the covariance matrices across salt types, the individual test for homogeneity didn’t show a difference in variances
After accounting for the differences across Days, we have the same issue that we had for our Completely Randomized Design analysis.
We can use anova_test()
in rstatix to quickly apply an
ANOVA test to each variable.
We need to put in “long” format first.
We can turn the dataframe into a long format using
pivot_longer
!
Just need to specify cols = response variable columns and what we want to rename the actual values column and variable identifier column to be
turkey |>
pivot_longer(
cols = Cooking_Loss:Hex,
names_to = "variable",
values_to = "measure"
) |>
group_by(variable) |>
anova_test(measure ~ Day + Salt) |>
data.frame() |>
filter(Effect == "Salt") |>
arrange(p)
## variable Effect DFn DFd F p p..05 ges
## 1 Hex Salt 4 16 17.003 1.29e-05 * 0.810
## 2 Cooking_Loss Salt 4 16 13.079 6.45e-05 * 0.766
## 3 Moist Salt 4 16 5.583 5.00e-03 * 0.583
## 4 Fat Salt 4 16 1.601 2.22e-01 0.286
## 5 Ph Salt 4 16 0.697 6.05e-01 0.148
The p-values for Hex, Cooking_Loss, and Moisture are statistically significant
We can find which groups are significantly different across the
significant variables using tukey_hsd()
in the rstatix
package
turkey_tukey <-
turkey |>
dplyr::select(-Ph, - Fat) |>
pivot_longer(
cols = Cooking_Loss:Hex,
names_to = "variable",
values_to = "measure"
) |>
group_by(variable) |>
tukey_hsd(
measure ~ Day + Salt,
which = "Salt"
) |>
filter(p.adj < 0.01) |>
arrange(variable, p.adj)
turkey_tukey
## # A tibble: 7 × 10
## variable term group1 group2 null.value estimate conf.low conf.high p.adj
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Cooking_Lo… Salt Contr… STP5 0 -6.42 -9.19 -3.65 2.16e-5
## 2 Cooking_Lo… Salt STP5 SASMP3 0 4.22 1.45 6.99 2.05e-3
## 3 Cooking_Lo… Salt STP3 STP5 0 -3.64 -6.41 -0.873 7.42e-3
## 4 Hex Salt STP5 SASMP3 0 0.874 0.503 1.24 1.76e-5
## 5 Hex Salt Contr… STP5 0 -0.748 -1.12 -0.377 1.12e-4
## 6 Hex Salt SASMP3 SASMP5 0 -0.586 -0.957 -0.215 1.45e-3
## 7 Moist Salt Contr… STP5 0 3.66 1.24 6.08 2.21e-3
## # ℹ 1 more variable: p.adj.signif <chr>