Case Study: Mouse Diets
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lmPerm)
## Warning: package 'lmPerm' was built under R version 4.0.5
library(DescTools)
## Warning: package 'DescTools' was built under R version 4.0.5
mice <- Sleuth3::case0501
attach(mice)
names(mice)
## [1] "Lifetime" "Diet"
~Data Introduction~
"Researchers have studied the effect of caloric restriction on lifespan. In one study, female mice were randomly assigned to the following treatment groups:
NP = unlimited amounts of a nonpurified, standard diet for lab mice N/N85 = normal feeding before and after weaning; ration was controlled at 85 kcal/week after weaning (control group) N/R50 = normal feeding before weaning, reduced-calorie diet of 50 kcal/week after weaning R/R50 = reduced-calorie diet of 50 kcal/week before and after weaning N/R50 lopro = normal feeding before weaning, reduced-calorie diet of 50 cal/week after weaning, and dietary protein reduced with advancing ag N/R40 = normal feeding before weaning, severely reduced-calorie diet of 40 kcal/week after weaning"
ggplot(data=mice, aes(x=Diet, y=Lifetime, fill=Diet)) +
geom_boxplot(outlier.color="red", outlier.size=2) +
labs(title="Lifetime of Mice Baced on Diet") +
theme_classic()
summary(
ANOVA.model <- aov(Lifetime ~ Diet, data=mice)
)
## Df Sum Sq Mean Sq F value Pr(>F)
## Diet 5 12734 2546.8 57.1 <2e-16 ***
## Residuals 343 15297 44.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Testing the null hypothesis that the means are equal at the 0.05 level of significance compared to the p-value of <2e-16 we can conclude that the means are not equal.There is a statistically significant difference in at least one of the means. Based on the boxplots the first means we should test would be N/N85 and NP.
ScheffeTest(ANOVA.model)
##
## Posthoc multiple comparisons of means: Scheffe Test
## 95% family-wise confidence level
##
## $Diet
## diff lwr.ci upr.ci pval
## N/R40-N/N85 12.4254386 8.291344 16.5595333 < 2e-16 ***
## N/R50-N/N85 9.6059550 5.630939 13.5809714 1.1e-11 ***
## NP-N/N85 -5.2891873 -9.643484 -0.9348907 0.0063 **
## R/R50-N/N85 10.1944862 5.989076 14.3998960 9.7e-12 ***
## lopro-N/N85 6.9944862 2.789076 11.1998960 1.6e-05 ***
## N/R50-N/R40 -2.8194836 -6.738990 1.1000229 0.3289
## NP-N/R40 -17.7146259 -22.018307 -13.4109443 < 2e-16 ***
## R/R50-N/R40 -2.2309524 -6.383933 1.9220282 0.6644
## lopro-N/R40 -5.4309524 -9.583933 -1.2779718 0.0022 **
## NP-N/R50 -14.8951423 -19.046249 -10.7440351 < 2e-16 ***
## R/R50-N/R50 0.5885312 -3.406123 4.5831856 0.9986
## lopro-N/R50 -2.6114688 -6.606123 1.3831856 0.4440
## R/R50-NP 15.4836735 11.111442 19.8559049 < 2e-16 ***
## lopro-NP 12.2836735 7.911442 16.6559049 1.4e-15 ***
## lopro-R/R50 -3.2000000 -7.423977 1.0239768 0.2695
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PostHocTest(ANOVA.model, method="scheffe", conf.level=NA, ordered=FALSE)
##
## Posthoc multiple comparisons of means: Scheffe Test
##
## $Diet
## N/N85 N/R40 N/R50 NP R/R50
## N/R40 < 2e-16 - - - -
## N/R50 1.1e-11 0.3289 - - -
## NP 0.0063 < 2e-16 < 2e-16 - -
## R/R50 9.7e-12 0.6644 0.9986 < 2e-16 -
## lopro 1.6e-05 0.0022 0.4440 1.4e-15 0.2695
Based on the results of Scheffe’s Test it does appear that NP and N/N85 are different compared to the other diets.
# model checking
GraphNormality <- function(model) {
residual.data <- data.frame(e=model$residuals)
H <- shapiro.test(residual.data$e)
ggplot(residual.data, aes(sample=e)) +
stat_qq() +
geom_abline(color="blue", intercept=mean(residual.data$e), slope=sd(residual.data$e)) +
labs(title="Normally Distributed Residuals?",
subtitle = paste("Shapiro-Wilks test p-value =", signif(H$p.value,5)) ) +
theme_classic()
}
GraphHomogeneity <- function(response, predictor, dataset=NULL) {
H <- bartlett.test(response~predictor)
ggplot(data=dataset, aes(x=predictor, y=response, fill=predictor)) +
geom_boxplot(outlier.color = "red", outlier.size=3) +
geom_jitter(width=0.2) +
labs( title="Homogenous Variances?",
subtitle=paste("Bartlett's test p-value =", signif(H$p.value,5)) ) +
theme_classic()
}
GraphNormality(ANOVA.model)
GraphHomogeneity(mice$Lifetime, mice$Diet, dataset=mice)
Based on the graph for normality, it does not appear that our data is normally distributed. Our data does barely appear to have homogeneous variances at a 0.05 significance level, but since the test for normality does not hold up, our data does not meet the ANOVA assumptions.
summary(
perm.model <- aovp(Lifetime ~ Diet, data=mice)
)
## [1] "Settings: unique SS "
## Component 1 :
## Df R Sum Sq R Mean Sq Iter Pr(Prob)
## Diet 5 12734 2546.8 5000 < 2.2e-16 ***
## Residuals 343 15297 44.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
class(perm.model)
## [1] "aovp" "aov" "lmp" "lm"
PostHocTest(perm.model, method="scheffe", conf.level=NA, ordered=FALSE)
##
## Posthoc multiple comparisons of means: Scheffe Test
##
## $Diet
## N/N85 N/R40 N/R50 NP R/R50
## N/R40 < 2e-16 - - - -
## N/R50 1.1e-11 0.3289 - - -
## NP 0.0063 < 2e-16 < 2e-16 - -
## R/R50 9.7e-12 0.6644 0.9986 < 2e-16 -
## lopro 1.6e-05 0.0022 0.4440 1.4e-15 0.2695
The comparisons are in the same order as our previous test.
( kw.test <- kruskal.test(Lifetime ~ Diet, data=mice) )
##
## Kruskal-Wallis rank sum test
##
## data: Lifetime by Diet
## Kruskal-Wallis chi-squared = 159.01, df = 5, p-value < 2.2e-16
class(kw.test)
## [1] "htest"
Using a bonferoni adjustment we come up with an adjusted alpha of 0.05/15 = 0.0033. With that we can now do pairwise attach(mice) wilcox.test(Lifetime[Diet==“N/R40”], Lifetime[Diet==“NP”], alternative=“two.sided”, conf.level=0.9967)
attach(mice)
wilcox.test(Lifetime[Diet=="N/R40"], Lifetime[Diet=="NP"], alternative="two.sided", conf.level=0.9967)
##
## Wilcoxon rank sum test with continuity correction
##
## data: Lifetime[Diet == "N/R40"] and Lifetime[Diet == "NP"]
## W = 2854, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
attach(mice)
wilcox.test(Lifetime[Diet=="N/R50"], Lifetime[Diet=="NP"], alternative="two.sided", conf.level=0.9967)
##
## Wilcoxon rank sum test with continuity correction
##
## data: Lifetime[Diet == "N/R50"] and Lifetime[Diet == "NP"]
## W = 3211.5, p-value = 3.939e-15
## alternative hypothesis: true location shift is not equal to 0
attach(mice)
wilcox.test(Lifetime[Diet=="N/R50"], Lifetime[Diet=="N/R40"], alternative="two.sided", conf.level=0.9967)
##
## Wilcoxon rank sum test with continuity correction
##
## data: Lifetime[Diet == "N/R50"] and Lifetime[Diet == "N/R40"]
## W = 1682.5, p-value = 0.03891
## alternative hypothesis: true location shift is not equal to 0
Just running a few pairwise comparisons shows us that our initial conclusion that N/N85 is a less effective diet than the others. We could run additional pairwise tests to confirm the same about NP.