Case Study: Mouse Diets

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.0     v dplyr   1.0.5
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lmPerm)
## Warning: package 'lmPerm' was built under R version 4.0.5
library(DescTools)
## Warning: package 'DescTools' was built under R version 4.0.5
mice <- Sleuth3::case0501
attach(mice)
names(mice)
## [1] "Lifetime" "Diet"

~Data Introduction~

"Researchers have studied the effect of caloric restriction on lifespan. In one study, female mice were randomly assigned to the following treatment groups:

NP = unlimited amounts of a nonpurified, standard diet for lab mice N/N85 = normal feeding before and after weaning; ration was controlled at 85 kcal/week after weaning (control group) N/R50 = normal feeding before weaning, reduced-calorie diet of 50 kcal/week after weaning R/R50 = reduced-calorie diet of 50 kcal/week before and after weaning N/R50 lopro = normal feeding before weaning, reduced-calorie diet of 50 cal/week after weaning, and dietary protein reduced with advancing ag N/R40 = normal feeding before weaning, severely reduced-calorie diet of 40 kcal/week after weaning"

ggplot(data=mice, aes(x=Diet, y=Lifetime, fill=Diet)) +
  geom_boxplot(outlier.color="red", outlier.size=2) +
  labs(title="Lifetime of Mice Baced on Diet") +
  theme_classic()

summary(
  ANOVA.model <- aov(Lifetime ~ Diet, data=mice)
)
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Diet          5  12734  2546.8    57.1 <2e-16 ***
## Residuals   343  15297    44.6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Testing the null hypothesis that the means are equal at the 0.05 level of significance compared to the p-value of <2e-16 we can conclude that the means are not equal.There is a statistically significant difference in at least one of the means. Based on the boxplots the first means we should test would be N/N85 and NP.

ScheffeTest(ANOVA.model)
## 
##   Posthoc multiple comparisons of means: Scheffe Test 
##     95% family-wise confidence level
## 
## $Diet
##                    diff     lwr.ci      upr.ci    pval    
## N/R40-N/N85  12.4254386   8.291344  16.5595333 < 2e-16 ***
## N/R50-N/N85   9.6059550   5.630939  13.5809714 1.1e-11 ***
## NP-N/N85     -5.2891873  -9.643484  -0.9348907  0.0063 ** 
## R/R50-N/N85  10.1944862   5.989076  14.3998960 9.7e-12 ***
## lopro-N/N85   6.9944862   2.789076  11.1998960 1.6e-05 ***
## N/R50-N/R40  -2.8194836  -6.738990   1.1000229  0.3289    
## NP-N/R40    -17.7146259 -22.018307 -13.4109443 < 2e-16 ***
## R/R50-N/R40  -2.2309524  -6.383933   1.9220282  0.6644    
## lopro-N/R40  -5.4309524  -9.583933  -1.2779718  0.0022 ** 
## NP-N/R50    -14.8951423 -19.046249 -10.7440351 < 2e-16 ***
## R/R50-N/R50   0.5885312  -3.406123   4.5831856  0.9986    
## lopro-N/R50  -2.6114688  -6.606123   1.3831856  0.4440    
## R/R50-NP     15.4836735  11.111442  19.8559049 < 2e-16 ***
## lopro-NP     12.2836735   7.911442  16.6559049 1.4e-15 ***
## lopro-R/R50  -3.2000000  -7.423977   1.0239768  0.2695    
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PostHocTest(ANOVA.model, method="scheffe", conf.level=NA, ordered=FALSE)
## 
##   Posthoc multiple comparisons of means: Scheffe Test 
## 
## $Diet
##       N/N85   N/R40   N/R50   NP      R/R50 
## N/R40 < 2e-16 -       -       -       -     
## N/R50 1.1e-11 0.3289  -       -       -     
## NP    0.0063  < 2e-16 < 2e-16 -       -     
## R/R50 9.7e-12 0.6644  0.9986  < 2e-16 -     
## lopro 1.6e-05 0.0022  0.4440  1.4e-15 0.2695

Based on the results of Scheffe’s Test it does appear that NP and N/N85 are different compared to the other diets.

# model checking
GraphNormality <- function(model) {
  residual.data <- data.frame(e=model$residuals)
  H             <- shapiro.test(residual.data$e)
  ggplot(residual.data, aes(sample=e)) + 
    stat_qq() +
    geom_abline(color="blue", intercept=mean(residual.data$e), slope=sd(residual.data$e)) +
    labs(title="Normally Distributed Residuals?", 
         subtitle = paste("Shapiro-Wilks test p-value =", signif(H$p.value,5)) ) +
    theme_classic()
}

GraphHomogeneity <- function(response, predictor, dataset=NULL) {
  H <- bartlett.test(response~predictor)    
  ggplot(data=dataset, aes(x=predictor, y=response, fill=predictor)) +
    geom_boxplot(outlier.color = "red", outlier.size=3) +
    geom_jitter(width=0.2) +
    labs( title="Homogenous Variances?",
         subtitle=paste("Bartlett's test p-value =", signif(H$p.value,5)) ) +    
    theme_classic()
}
GraphNormality(ANOVA.model)

GraphHomogeneity(mice$Lifetime, mice$Diet, dataset=mice)

Based on the graph for normality, it does not appear that our data is normally distributed. Our data does barely appear to have homogeneous variances at a 0.05 significance level, but since the test for normality does not hold up, our data does not meet the ANOVA assumptions.

summary(
  perm.model <- aovp(Lifetime ~ Diet, data=mice)
)
## [1] "Settings:  unique SS "
## Component 1 :
##              Df R Sum Sq R Mean Sq Iter  Pr(Prob)    
## Diet          5    12734    2546.8 5000 < 2.2e-16 ***
## Residuals   343    15297      44.6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
class(perm.model)
## [1] "aovp" "aov"  "lmp"  "lm"
PostHocTest(perm.model, method="scheffe", conf.level=NA, ordered=FALSE)
## 
##   Posthoc multiple comparisons of means: Scheffe Test 
## 
## $Diet
##       N/N85   N/R40   N/R50   NP      R/R50 
## N/R40 < 2e-16 -       -       -       -     
## N/R50 1.1e-11 0.3289  -       -       -     
## NP    0.0063  < 2e-16 < 2e-16 -       -     
## R/R50 9.7e-12 0.6644  0.9986  < 2e-16 -     
## lopro 1.6e-05 0.0022  0.4440  1.4e-15 0.2695

The comparisons are in the same order as our previous test.

( kw.test <- kruskal.test(Lifetime ~ Diet, data=mice) )
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Lifetime by Diet
## Kruskal-Wallis chi-squared = 159.01, df = 5, p-value < 2.2e-16
class(kw.test)
## [1] "htest"

Using a bonferoni adjustment we come up with an adjusted alpha of 0.05/15 = 0.0033. With that we can now do pairwise attach(mice) wilcox.test(Lifetime[Diet==“N/R40”], Lifetime[Diet==“NP”], alternative=“two.sided”, conf.level=0.9967)

attach(mice)
wilcox.test(Lifetime[Diet=="N/R40"], Lifetime[Diet=="NP"], alternative="two.sided", conf.level=0.9967)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Lifetime[Diet == "N/R40"] and Lifetime[Diet == "NP"]
## W = 2854, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
attach(mice)
wilcox.test(Lifetime[Diet=="N/R50"], Lifetime[Diet=="NP"], alternative="two.sided", conf.level=0.9967)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Lifetime[Diet == "N/R50"] and Lifetime[Diet == "NP"]
## W = 3211.5, p-value = 3.939e-15
## alternative hypothesis: true location shift is not equal to 0
attach(mice)
wilcox.test(Lifetime[Diet=="N/R50"], Lifetime[Diet=="N/R40"], alternative="two.sided", conf.level=0.9967)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Lifetime[Diet == "N/R50"] and Lifetime[Diet == "N/R40"]
## W = 1682.5, p-value = 0.03891
## alternative hypothesis: true location shift is not equal to 0

Just running a few pairwise comparisons shows us that our initial conclusion that N/N85 is a less effective diet than the others. We could run additional pairwise tests to confirm the same about NP.