Chapter 6: RCBD Manova

Set Up Your Project and Load Libraries

knitr::opts_chunk$set(echo = TRUE) 
options(digits = 3)

## Load the libraries we will be using
pacman::p_load(tidyverse, Rfast, heplots, 
               GGally, MVN, car, rstatix)

## Changing the default theme to black white
theme_set(theme_bw())

theme_update(
  axis.title = element_text(size = 10),
  axis.text = element_text(size = 10),
  plot.title = element_text(size = 14, hjust = 0.5),
  plot.subtitle = element_text(size = 10, hjust = 0.5),
  plot.caption = element_text(size = 8)
)

# Reading in the data:
turkey <- readxl::read_excel("turkey.xlsx")

# Let's change Trt to a factor and adding the name of the level
# and change Day to a factor as well
turkey <- 
  turkey |> 
  rename(Salt = Trt) |> 
  mutate(
    Salt = factor(
      Salt, 
      labels = c("Control", "STP3", "STP5",
                 "SASMP3", "SASMP5")),
    Day = as_factor(Day)
  ) 


# skim creates the basic summary stats for each variable
skimr::skim(turkey)

Data summary
Name	turkey
Number of rows	25
Number of columns	7
_______________________
Column type frequency:
factor	2
numeric	5
________________________
Group variables	None

Variable type: factor

skim_variable	n_missing	complete_rate	ordered	n_unique	top_counts
Day	0	1	FALSE	5	1: 5, 2: 5, 3: 5, 4: 5
Salt	0	1	FALSE	5	Con: 5, STP: 5, STP: 5, SAS: 5

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
Cooking_Loss	1	34.20	2.86	26.80	33.00	34.20	36.20	38.50	▂▁▇▅▇
Ph	1	6.61	0.23	6.20	6.49	6.64	6.72	7.34	▃▇▇▁▁
Moist	1	60.22	1.67	56.25	59.57	60.40	61.37	63.37	▂▁▆▇▁
Fat	1	10.00	0.62	9.18	9.52	10.00	10.18	11.72	▆▇▃▁▂
Hex	1	0.77	0.43	0.20	0.40	0.73	1.00	1.58	▇▇▆▂▅

Preliminary Analysis

Similar to our preliminary analysis from the previous example, but now let’s also count the number of blocks as b!

# Total sample size (N)
N <- nrow(turkey)

# number of treatments (k) 
k <- n_distinct(turkey$Salt)

# Calculating the number of blocks: b
b <- n_distinct(turkey$Day)

# Number of response variables (p)
p <- 
  turkey |> 
  dplyr::select(Cooking_Loss:Hex) |> 
  ncol()

c("N" = N, "groups" = k, "blocks" = b, "variables" = p)

##         N    groups    blocks variables 
##        25         5         5         5

Since the design is balance, the individual sample size is N/k

n_groups <- 
  turkey |> 
  count(Salt) |> 
  dplyr::select(n) |> 
  unlist()

n_groups

## n1 n2 n3 n4 n5 
##  5  5  5  5  5

Calculating the overall mean of each variable:

overallmean <- 
  turkey |> 
  dplyr::select(where(is.numeric)) |> 
  colMeans()

overallmean

## Cooking_Loss           Ph        Moist          Fat          Hex 
##       34.200        6.610       60.216       10.004        0.773

Finding the mean vector for each salt type (i): \(\bar{\mathbf{y}}_{i\bullet}\)

salt_means <-
  turkey |> 
  summarize(
    .by = Salt,
    across(.cols = where(is.numeric),
           .fns = mean)
  )

salt_means

## # A tibble: 5 × 6
##   Salt    Cooking_Loss    Ph Moist   Fat   Hex
##   <fct>          <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control         37.1  6.53  58.2 10.4  1.06 
## 2 STP3            34.3  6.58  60.7  9.81 0.722
## 3 STP5            30.7  6.68  61.8  9.69 0.308
## 4 SASMP3          34.9  6.55  60.2 10.3  1.18 
## 5 SASMP5          34    6.71  60.2  9.87 0.596

Finding the mean vector for each block (j, Day): \(\bar{\mathbf{y}}_{\bullet j}\)

day_means <-
  turkey |> 
  summarize(
    .by = Day,
    across(.cols = where(is.numeric),
           .fns = mean)
  )

day_means

## # A tibble: 5 × 6
##   Day   Cooking_Loss    Ph Moist   Fat   Hex
##   <fct>        <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1             33.9  6.41  60.0 10.4  1.09 
## 2 2             32.3  6.67  61.0  9.43 0.554
## 3 3             32.9  6.61  59.5 10.3  0.468
## 4 4             35.9  6.76  60.7 10.1  0.966
## 5 5             36.0  6.60  59.9  9.91 0.79

RCBD MANOVA: SS Matrices

#### Fitting a CRD MANOVA model in R
turkey_CRD <- 
  manova(
    cbind(Cooking_Loss, Ph, Moist, Fat, Hex) ~ Salt, 
    data = turkey
  )

turkey_CRD

## Call:
##    manova(cbind(Cooking_Loss, Ph, Moist, Fat, Hex) ~ Salt, data = turkey)
## 
## Terms:
##                  Salt Residuals
## Cooking_Loss    106.7      90.2
## Ph                0.1       1.1
## Moist            34.9      32.2
## Fat               1.8       7.3
## Hex               2.5       2.0
## Deg. of Freedom     4        20
## 
## Residual standard errors: 2.12 0.237 1.27 0.604 0.314
## Estimated effects may be unbalanced

Adding the effect of day to make a RCBD

turkey_RCBD <- 
  manova(
    cbind(Cooking_Loss, Ph, Moist, Fat, Hex) ~ Salt + Day, 
    data = turkey
  )


turkey_RCBD

## Call:
##    manova(cbind(Cooking_Loss, Ph, Moist, Fat, Hex) ~ Salt + Day, 
##     data = turkey)
## 
## Terms:
##                  Salt   Day Residuals
## Cooking_Loss    106.7  57.5      32.6
## Ph                0.1   0.3       0.8
## Moist            34.9   7.2      25.0
## Fat               1.8   2.7       4.6
## Hex               2.5   1.4       0.6
## Deg. of Freedom     4     4        16
## 
## Residual standard errors: 1.43 0.222 1.25 0.537 0.191
## Estimated effects may be unbalanced

Notice that the day + residuals from the RCBD = residuals from CRD, while the effect of salt type remains unchanged!

Comparing SS Matrices from the two models

summary(turkey_CRD)$SS$Salt

##              Cooking_Loss     Ph  Moist    Fat    Hex
## Cooking_Loss       106.72 -2.856 -57.93 11.691 13.865
## Ph                  -2.86  0.138   1.40 -0.397 -0.501
## Moist              -57.93  1.397  34.85 -6.565 -6.812
## Fat                 11.69 -0.397  -6.57  1.849  2.018
## Hex                 13.87 -0.501  -6.81  2.018  2.488

summary(turkey_RCBD)$SS$Salt

##              Cooking_Loss     Ph  Moist    Fat    Hex
## Cooking_Loss       106.72 -2.856 -57.93 11.691 13.865
## Ph                  -2.86  0.138   1.40 -0.397 -0.501
## Moist              -57.93  1.397  34.85 -6.565 -6.812
## Fat                 11.69 -0.397  -6.57  1.849  2.018
## Hex                 13.87 -0.501  -6.81  2.018  2.488

summary(turkey_CRD)$SS$Residuals

##              Cooking_Loss      Ph  Moist   Fat     Hex
## Cooking_Loss       90.156 -0.2628 -11.80  0.34  3.6296
## Ph                 -0.263  1.1217   1.64 -0.27 -0.0348
## Moist             -11.802  1.6350  32.19 -4.61  1.9382
## Fat                 0.340 -0.2699  -4.61  7.29  1.2521
## Hex                 3.630 -0.0348   1.94  1.25  1.9677

summary(turkey_RCBD)$SS$Residuals + summary(turkey_RCBD)$SS$Day

##              Cooking_Loss      Ph  Moist   Fat     Hex
## Cooking_Loss       90.156 -0.2628 -11.80  0.34  3.6296
## Ph                 -0.263  1.1217   1.64 -0.27 -0.0348
## Moist             -11.802  1.6350  32.19 -4.61  1.9382
## Fat                 0.340 -0.2699  -4.61  7.29  1.2521
## Hex                 3.630 -0.0348   1.94  1.25  1.9677

By accounting for the effect of Day, we don’t change the performance of the Salts, but we do remove some of the unexplained differences remaining in our error terms!

Manova(
  turkey_RCBD, 
  test = "Wilks", 
  type = 3
)

## 
## Type III MANOVA Tests: Wilks test statistic
##             Df test stat approx F num Df den Df  Pr(>F)    
## (Intercept)  1    0.0008     2967      5   12.0 < 2e-16 ***
## Salt         4    0.0365        3     20   40.7 0.00035 ***
## Day          4    0.0355        4     20   40.7 0.00031 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Manova(
  turkey_CRD, 
  test = "Wilks", 
  type = 3
)

## 
## Type III MANOVA Tests: Wilks test statistic
##             Df test stat approx F num Df den Df Pr(>F)    
## (Intercept)  1    0.0007     4652      5     16 <2e-16 ***
## Salt         4    0.1715        2     20     54  0.033 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Checking the results using Pillai’s statistic

Manova(
  turkey_RCBD, 
  test = "Pillai", 
  type = 3
)

## 
## Type III MANOVA Tests: Pillai test statistic
##             Df test stat approx F num Df den Df Pr(>F)    
## (Intercept)  1     0.999     2967      5     12 <2e-16 ***
## Salt         4     1.433        2     20     60 0.0644 .  
## Day          4     1.900        3     20     60 0.0015 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Manova(
  turkey_CRD, 
  test = "Pillai", 
  type = 3
)

## 
## Type III MANOVA Tests: Pillai test statistic
##             Df test stat approx F num Df den Df Pr(>F)    
## (Intercept)  1     0.999     4652      5     16 <2e-16 ***
## Salt         4     1.088        1     20     76   0.14    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

By accounting for the day, the effect of salt became more evident!

Note that the salt effect isn’t stronger, but we removed some of the noise to make it easier to notice a changed caused by the salt types.

Checking the Assumptions

res_RCBD <- 
  turkey_RCBD |> 
  pluck(residuals) |> 
  data.frame() |> 
  add_column(Salt = turkey$Salt)
res_RCBD

##    Cooking_Loss      Ph   Moist     Fat     Hex    Salt
## 1         -0.58 -0.1256 -0.9224  1.0204 -0.0492 Control
## 2          2.20 -0.0416 -0.4504 -0.1556  0.1448    STP3
## 3          1.14  0.0364  0.3376 -0.1856 -0.1212    STP5
## 4         -1.58  0.1424  0.0096 -0.3376  0.0548  SASMP3
## 5         -1.18 -0.0116  1.0256 -0.3416 -0.0292  SASMP5
## 6         -0.38 -0.0996  2.4496 -0.4216  0.1628 Control
## 7          0.60 -0.0256 -1.4584  0.0924 -0.1032    STP3
## 8         -1.96 -0.0176  0.7596  0.9724  0.2108    STP5
## 9          1.02  0.0584 -1.1484 -0.5296 -0.2132  SASMP3
## 10         0.72  0.0844 -0.6024 -0.1136 -0.0572  SASMP5
## 11         1.66 -0.3296 -0.4844 -0.6816 -0.1712 Control
## 12         0.14  0.0844  0.8976  0.1224  0.0628    STP3
## 13        -1.62  0.0924  0.7656 -0.5576  0.1968    STP5
## 14         0.56  0.0884 -0.1624  0.7604 -0.1472  SASMP3
## 15        -0.74  0.0644 -1.0164  0.3564  0.0588  SASMP5
## 16        -0.32  0.6664  0.5896  0.1604  0.2308 Control
## 17        -1.04 -0.1196 -0.0084  0.1744 -0.0152    STP3
## 18         1.40 -0.1816 -1.9204 -0.2356 -0.1812    STP5
## 19        -0.82 -0.2256  0.4116  0.1424  0.2048  SASMP3
## 20         0.78 -0.1396  0.9276 -0.2416 -0.2392  SASMP5
## 21        -0.38 -0.1116 -1.6324 -0.0776 -0.1732 Control
## 22        -1.90  0.1024  1.0196 -0.2336 -0.0892    STP3
## 23         1.04  0.0704  0.0576  0.0064 -0.1052    STP5
## 24         0.82 -0.0636  0.8896 -0.0356  0.1008  SASMP3
## 25         0.42  0.0024 -0.3344  0.3404  0.2668  SASMP5

#### MVN for the residuals #####
mvn(
  res_RCBD |> select(-Salt), 
  mvnTest="mardia", 
  multivariatePlot = "qq",
  desc = F,
  univariateTest = "SW"
)

## $multivariateNormality
##              Test          Statistic           p value Result
## 1 Mardia Skewness   30.9206335425045  0.66543282841103    YES
## 2 Mardia Kurtosis -0.538068348354928 0.590529863133813    YES
## 3             MVN               <NA>              <NA>    YES
## 
## $univariateNormality
##           Test     Variable Statistic   p value Normality
## 1 Shapiro-Wilk Cooking_Loss     0.965    0.5120    YES   
## 2 Shapiro-Wilk      Ph          0.829    0.0007    NO    
## 3 Shapiro-Wilk    Moist         0.970    0.6507    YES   
## 4 Shapiro-Wilk     Fat          0.930    0.0853    YES   
## 5 Shapiro-Wilk     Hex          0.939    0.1407    YES

Small sample, but we still reject that Ph is univariate Normal, so there is evidence that the complete data isn’t MVN

Checking the Equal variance assumption

Can’t check equal covariance assumption since b = p

Using Levene’s test to check for equal variance across salts with the day effect removed: using the residuals from just the day model.

res_RCBD |> 
  pivot_longer(
    cols = Cooking_Loss:Hex, 
    names_to = "variable", 
    values_to = "residual"
  ) |>
  group_by(variable) |> 
  levene_test(residual ~ Salt) |>
  data.frame() |> 
  arrange(p)

##       variable df1 df2 statistic     p
## 1          Fat   4  20     0.657 0.629
## 2           Ph   4  20     0.646 0.636
## 3 Cooking_Loss   4  20     0.501 0.735
## 4        Moist   4  20     0.409 0.800
## 5          Hex   4  20     0.273 0.892

Although we can’t compare the covariance matrices across salt types, the individual test for homogeneity didn’t show a difference in variances

After accounting for the differences across Days, we have the same issue that we had for our Completely Randomized Design analysis.

Post Hoc Analysis: Finding the Differences

Finding which variables are different:

We can use anova_test() in rstatix to quickly apply an ANOVA test to each variable.

We need to put in “long” format first.

We can turn the dataframe into a long format using pivot_longer!

Just need to specify cols = response variable columns and what we want to rename the actual values column and variable identifier column to be

turkey |> 
  pivot_longer(
    cols = Cooking_Loss:Hex, 
    names_to = "variable", 
    values_to = "measure"
  ) |>
  group_by(variable) |> 
  anova_test(measure ~ Day + Salt) |>
  data.frame() |> 
  filter(Effect == "Salt") |>
  arrange(p)

##       variable Effect DFn DFd      F        p p..05   ges
## 1          Hex   Salt   4  16 17.003 1.29e-05     * 0.810
## 2 Cooking_Loss   Salt   4  16 13.079 6.45e-05     * 0.766
## 3        Moist   Salt   4  16  5.583 5.00e-03     * 0.583
## 4          Fat   Salt   4  16  1.601 2.22e-01       0.286
## 5           Ph   Salt   4  16  0.697 6.05e-01       0.148

The p-values for Hex, Cooking_Loss, and Moisture are statistically significant

Comparing Significant Variables Across Groups

We can find which groups are significantly different across the significant variables using tukey_hsd() in the rstatix package

turkey_tukey <- 
  turkey |> 
  dplyr::select(-Ph, - Fat) |>
  pivot_longer(
    cols = Cooking_Loss:Hex, 
    names_to = "variable", 
    values_to = "measure"
  ) |>
  group_by(variable) |> 
  tukey_hsd(
    measure ~ Day + Salt, 
    which = "Salt"
  ) |> 
  filter(p.adj < 0.01) |>
  arrange(variable, p.adj)


turkey_tukey

## # A tibble: 7 × 10
##   variable    term  group1 group2 null.value estimate conf.low conf.high   p.adj
##   <chr>       <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
## 1 Cooking_Lo… Salt  Contr… STP5            0   -6.42    -9.19     -3.65  2.16e-5
## 2 Cooking_Lo… Salt  STP5   SASMP3          0    4.22     1.45      6.99  2.05e-3
## 3 Cooking_Lo… Salt  STP3   STP5            0   -3.64    -6.41     -0.873 7.42e-3
## 4 Hex         Salt  STP5   SASMP3          0    0.874    0.503     1.24  1.76e-5
## 5 Hex         Salt  Contr… STP5            0   -0.748   -1.12     -0.377 1.12e-4
## 6 Hex         Salt  SASMP3 SASMP5          0   -0.586   -0.957    -0.215 1.45e-3
## 7 Moist       Salt  Contr… STP5            0    3.66     1.24      6.08  2.21e-3
## # ℹ 1 more variable: p.adj.signif <chr>