[1] "C:/Users/anune/Downloads"
Last Homework
Repeated Measures
Setup Code
#================================================================#
# Setup Options
#================================================================#
# good habit to run is the sessionInfo() function to print all relevant info
sessionInfo()R version 4.1.3 (2022-03-10)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 22621)
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
loaded via a namespace (and not attached):
[1] htmlwidgets_1.6.2 compiler_4.1.3 fastmap_1.1.1 cli_3.6.1
[5] tools_4.1.3 htmltools_0.5.5 rstudioapi_0.15.0 yaml_2.3.7
[9] rmarkdown_2.25 knitr_1.44 jsonlite_1.8.4 xfun_0.39
[13] digest_0.6.31 rlang_1.1.0 evaluate_0.22
# remove all objects if restarting script
rm(list=ls())
knitr::opts_knit$set(root.dir = "C:\\Users\\anune\\OneDrive - Iowa State University\\ANS500AB")
getwd()[1] "C:/Users/anune/Downloads"
# list all objects in your environment
ls()character(0)
# set tibble width for printing - print all columns
options(tibble.width = Inf)
# remove scientific notation (by default will print)
options(scipen=999)
#================================================================#
# Packages
#================================================================#
# Here we must load the packages for the current environment to make them
# accessible.
# load libraries
library(tidyverse)
library(emmeans)
library(car)
library(DT)
library(lme4)
library(lmerTest)Final Homework
Read Data
Read weekly milk production in grams from Ewes of 2crosses subject to 3 diets
rm(list = ls())
library(readr)
library(stringr)
setwd("C:\\Users\\anune\\OneDrive - Iowa State University\\ANS500AB")
sheep <- readxl::read_xlsx("C:\\Users\\anune\\OneDrive - Iowa State University\\ANS500AB\\sheep_milk.xlsx")
head(sheep)# A tibble: 6 x 10
cross Diet w_0 w_1 w_2 w_3 w_4 w_5 w_6 w_7
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 1260 1640 1080 1000 950 1000 900 800
2 1 1 1900 2570 1100 1520 1200 1300 1350 1250
3 1 1 1380 2760 1550 980 1380 1000 1300 1050
4 1 1 3910 3190 1050 1180 980 850 1100 950
5 1 1 2900 2440 1280 1540 740 800 600 500
6 1 2 2210 2330 1700 1180 890 850 750 600
datatable(sheep)Transform data (10p)
Add a column called animal that contains the row number (2p)
Transform data to long format, including the following columns: cross, Diet, animal, week, milk (2p)
Add a column called indiv, that contains the animal, but as a factor. (2p) Add a column called period, that contains the week, but as a factor. (2p) Change the cross and Diet to factors too (2p)
library(tidyr)
sheep$animal <- seq_len(nrow(sheep))
head(sheep)# A tibble: 6 x 11
cross Diet w_0 w_1 w_2 w_3 w_4 w_5 w_6 w_7 animal
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 1 1 1260 1640 1080 1000 950 1000 900 800 1
2 1 1 1900 2570 1100 1520 1200 1300 1350 1250 2
3 1 1 1380 2760 1550 980 1380 1000 1300 1050 3
4 1 1 3910 3190 1050 1180 980 850 1100 950 4
5 1 1 2900 2440 1280 1540 740 800 600 500 5
6 1 2 2210 2330 1700 1180 890 850 750 600 6
colnames(sheep) [1] "cross" "Diet" "w_0" "w_1" "w_2" "w_3" "w_4" "w_5"
[9] "w_6" "w_7" "animal"
sheep_long <- tidyr::pivot_longer(sheep, cols = starts_with("w_"),
names_to = "week", values_to = "milk")
sheep_long$indiv <- as.factor(sheep_long$animal)
sheep_long$period <- as.factor(sheep_long$week)
sheep_long$cross <- as.factor(sheep_long$cross)
sheep_long$Diet <- as.factor(sheep_long$Diet)
num_animals <- length(unique(sheep_long$animal))
print(num_animals)[1] 30
help("pivot_longer")starting httpd help server ... done
head(sheep_long)# A tibble: 6 x 7
cross Diet animal week milk indiv period
<fct> <fct> <int> <chr> <dbl> <fct> <fct>
1 1 1 1 w_0 1260 1 w_0
2 1 1 1 w_1 1640 1 w_1
3 1 1 1 w_2 1080 1 w_2
4 1 1 1 w_3 1000 1 w_3
5 1 1 1 w_4 950 1 w_4
6 1 1 1 w_5 1000 1 w_5
datatable(sheep_long)Represent the data (10 p)
Build a graphic with 3 panels, one panel per cross and add line plots of milk production as a function of week, but represent the three diets in different colors
sheep_long$week <- as.numeric(gsub("w_", "", sheep_long$week))
bar_colors <- c("#66c2a5", "#fc8d62", "#8da0cb")
line_colors <- c("#e78ac3", "#a6d854", "#ffd92f")
ggplot(sheep_long, aes(x = week, y = milk, fill = factor(Diet))) +
geom_bar(stat = "identity", position = "dodge", color = "black", width = 0.8, alpha = 0.7, aes(color = factor(Diet))) +
geom_line(aes(group = interaction(cross, Diet), color = factor(Diet)), position = position_dodge(width = 0.8), size = 0.8) +
facet_wrap(~cross, scales = "free_y", ncol = 3) +
labs(title = "Milk Production as a Function of Week by Cross",
x = "Week",
y = "Milk Production (grams)",
fill = "Diet",
color = "Diet") +
scale_fill_manual(values = bar_colors) +
scale_color_manual(values = line_colors)Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
i Please use `linewidth` instead.
Full model
Fit a model that contains the three way interaction: of period, diet and cross for the fixed effects with an unstructured variance covariance between periods within individual. (10p)
Show the resulting variance covariance matrix (10p)
sheep_long$period <- as.numeric(gsub("w_", "", sheep_long$week))
library(nlme)
Attaching package: 'nlme'
The following object is masked from 'package:lme4':
lmList
The following object is masked from 'package:dplyr':
collapse
model_1 <- lme(milk ~ period * Diet * cross,
random = ~1 | indiv/period,
correlation = corSymm(form = ~1 | indiv),
data = sheep_long)Warning in lme.formula(milk ~ period * Diet * cross, random = ~1 |
indiv/period, : cannot use smaller level of grouping for 'correlation' than for
'random'. Replacing the former with the latter.
var_cov_matrix <- VarCorr(model_1)
print(var_cov_matrix) Variance StdDev
indiv = pdLogChol(1)
(Intercept) 11768.17 108.4812
period = pdLogChol(1)
(Intercept) 162998.82 403.7311
Residual 21444.11 146.4381
two reduced covariance models
Fit two more models (10p each) with reduced covariances, any model of your choice, but explain your choice. In both cases, show the resulting variance covariance matrix.
sheep_long$period <- factor(sheep_long$period, levels = 0:7)
model_2 <- lme(milk ~ period * Diet * cross,
random = ~1 | indiv,
data = sheep_long)
var_cov_matrix_2 <- VarCorr(model_2)
print(var_cov_matrix_2)indiv = pdLogChol(1)
Variance StdDev
(Intercept) 21181.66 145.5392
Residual 109135.25 330.3562
model_3 <- lme(milk ~ period * Diet * cross,
random = ~1 | indiv/period,
data = sheep_long,
correlation = corAR1(form = ~1 | indiv))Warning in lme.formula(milk ~ period * Diet * cross, random = ~1 |
indiv/period, : cannot use smaller level of grouping for 'correlation' than for
'random'. Replacing the former with the latter.
var_cov_matrix3 <- VarCorr(model_3)
print(var_cov_matrix3) Variance StdDev
indiv = pdLogChol(1)
(Intercept) 21181.61 145.5390
period = pdLogChol(1)
(Intercept) 94968.06 308.1689
Residual 14167.21 119.0261
###Explanation:
Model 2: This model includes a random intercept for each individual (random = ~1 | indiv), but it does not include any correlation between repeated measures within the same individual or across different periods.
Choice Rationale: This model assumes independence of observations within individuals and across periods, making it a simpler model compared to the one with a correlation structure.
Model 3: Now this model includes a random intercept for each individual and allows for a correlation structure among repeated measures within each individual (random = ~1 | indiv/period). The correlation structure is an AR(1) structure (corAR1(form = ~1 | indiv)), which models autocorrelation between observations within the same individual.
Choice Rationale: This model accounts for potential autocorrelation between consecutive periods within the same individual, providing a more complex but potentially more realistic representation of the data.
Both models offer different assumptions about the correlation structure within the data, and the choice between them depends on the underlying patterns and characteristics of your data. Model 3 may be preferred if there is reason to suspect autocorrelation within individuals across periods.
Compare the AIC of the three fitted models and select best model. (10 p)
aic_values <- c(AIC(model_1), AIC(model_2), AIC(model_3))
print(aic_values)[1] 3505.671 2971.883 2975.883
best_model_index <- which.min(aic_values)
best_model <- list(model_1, model_2, model_3)[best_model_index]
print(best_model)[[1]]
Linear mixed-effects model fit by REML
Data: sheep_long
Log-restricted-likelihood: -1435.941
Fixed: milk ~ period * Diet * cross
(Intercept) period1 period2
2270 250 -1058
period3 period4 period5
-1026 -1220 -1280
period6 period7 Diet2
-1220 -1360 -338
Diet3 cross2 period1:Diet2
-1130 920 -508
period2:Diet2 period3:Diet2 period4:Diet2
404 28 -84
period5:Diet2 period6:Diet2 period7:Diet2
-112 -130 -142
period1:Diet3 period2:Diet3 period3:Diet3
-462 576 290
period4:Diet3 period5:Diet3 period6:Diet3
384 354 322
period7:Diet3 period1:cross2 period2:cross2
380 -174 -594
period3:cross2 period4:cross2 period5:cross2
-816 -858 -810
period6:cross2 period7:cross2 Diet2:cross2
-970 -754 -786
Diet3:cross2 period1:Diet2:cross2 period2:Diet2:cross2
-50 -14 380
period3:Diet2:cross2 period4:Diet2:cross2 period5:Diet2:cross2
686 812 764
period6:Diet2:cross2 period7:Diet2:cross2 period1:Diet3:cross2
808 870 -270
period2:Diet3:cross2 period3:Diet3:cross2 period4:Diet3:cross2
94 310 284
period5:Diet3:cross2 period6:Diet3:cross2 period7:Diet3:cross2
326 518 328
Random effects:
Formula: ~1 | indiv
(Intercept) Residual
StdDev: 145.5392 330.3562
Number of Observations: 240
Number of Groups: 30
##Lower AIC values are better, Model 2 has the lowest AIC and is considered the best model among the three you compared.
ANOVA 10p
using the best model Perform an ANOVA to determine if there is a significant 3-way interaction: perioddietcross hint: use emmeans and join_tests, and specify: periodDietcross
library(lmerTest)
colnames(sheep_long)[1] "cross" "Diet" "animal" "week" "milk" "indiv" "period"
best_model_lmerTest <- lmerTest::lmer(milk ~ period * Diet * cross + (1 | indiv), data = sheep_long)
emmeans_lmerTest <- emmeans::emmeans(best_model_lmerTest, ~ period * Diet * cross)
joint_test_result_lmerTest <- emmeans::joint_tests(emmeans_lmerTest, hypothesis = "period * Diet * cross")
print(joint_test_result_lmerTest) model term df1 df2 F.ratio p.value
period 7 168 92.503 <.0001
Diet 2 24 50.195 <.0001
cross 1 24 15.124 0.0007
period:Diet 14 168 4.711 <.0001
period:cross 7 168 2.065 0.0500
Diet:cross 2 24 2.880 0.0757
period:Diet:cross 14 168 0.867 0.5954
mean comparison 20p
for each period, compare the three diets pairwise and write conclusions (10 p)
best_model[[1]]
Linear mixed-effects model fit by REML
Data: sheep_long
Log-restricted-likelihood: -1435.941
Fixed: milk ~ period * Diet * cross
(Intercept) period1 period2
2270 250 -1058
period3 period4 period5
-1026 -1220 -1280
period6 period7 Diet2
-1220 -1360 -338
Diet3 cross2 period1:Diet2
-1130 920 -508
period2:Diet2 period3:Diet2 period4:Diet2
404 28 -84
period5:Diet2 period6:Diet2 period7:Diet2
-112 -130 -142
period1:Diet3 period2:Diet3 period3:Diet3
-462 576 290
period4:Diet3 period5:Diet3 period6:Diet3
384 354 322
period7:Diet3 period1:cross2 period2:cross2
380 -174 -594
period3:cross2 period4:cross2 period5:cross2
-816 -858 -810
period6:cross2 period7:cross2 Diet2:cross2
-970 -754 -786
Diet3:cross2 period1:Diet2:cross2 period2:Diet2:cross2
-50 -14 380
period3:Diet2:cross2 period4:Diet2:cross2 period5:Diet2:cross2
686 812 764
period6:Diet2:cross2 period7:Diet2:cross2 period1:Diet3:cross2
808 870 -270
period2:Diet3:cross2 period3:Diet3:cross2 period4:Diet3:cross2
94 310 284
period5:Diet3:cross2 period6:Diet3:cross2 period7:Diet3:cross2
326 518 328
Random effects:
Formula: ~1 | indiv
(Intercept) Residual
StdDev: 145.5392 330.3562
Number of Observations: 240
Number of Groups: 30
sheep_long# A tibble: 240 x 7
cross Diet animal week milk indiv period
<fct> <fct> <int> <dbl> <dbl> <fct> <fct>
1 1 1 1 0 1260 1 0
2 1 1 1 1 1640 1 1
3 1 1 1 2 1080 1 2
4 1 1 1 3 1000 1 3
5 1 1 1 4 950 1 4
6 1 1 1 5 1000 1 5
7 1 1 1 6 900 1 6
8 1 1 1 7 800 1 7
9 1 1 2 0 1900 2 0
10 1 1 2 1 2570 2 1
# i 230 more rows
sheep_long$period <- factor(sheep_long$period, levels = 0:7)
model_2 <- lme(milk ~ period * Diet * cross,
random = ~1 | indiv,
data = sheep_long)
combined_emmeans_results <- emmeans(model_2, ~Diet | period)NOTE: Results may be misleading due to involvement in interactions
pairwise_comparisons <- pairs(combined_emmeans_results)
summary(pairwise_comparisons)period = 0:
contrast estimate SE df t.ratio p.value
1 - 2 731 161 24 4.528 0.0004
1 - 3 1155 161 24 7.154 <.0001
2 - 3 424 161 24 2.626 0.0380
period = 1:
contrast estimate SE df t.ratio p.value
1 - 2 1246 161 24 7.718 <.0001
1 - 3 1752 161 24 10.852 <.0001
2 - 3 506 161 24 3.134 0.0121
period = 2:
contrast estimate SE df t.ratio p.value
1 - 2 137 161 24 0.849 0.6771
1 - 3 532 161 24 3.295 0.0083
2 - 3 395 161 24 2.447 0.0556
period = 3:
contrast estimate SE df t.ratio p.value
1 - 2 360 161 24 2.230 0.0863
1 - 3 710 161 24 4.398 0.0005
2 - 3 350 161 24 2.168 0.0975
period = 4:
contrast estimate SE df t.ratio p.value
1 - 2 409 161 24 2.533 0.0463
1 - 3 629 161 24 3.896 0.0019
2 - 3 220 161 24 1.363 0.3757
period = 5:
contrast estimate SE df t.ratio p.value
1 - 2 461 161 24 2.856 0.0229
1 - 3 638 161 24 3.952 0.0017
2 - 3 177 161 24 1.096 0.5255
period = 6:
contrast estimate SE df t.ratio p.value
1 - 2 457 161 24 2.831 0.0242
1 - 3 574 161 24 3.555 0.0044
2 - 3 117 161 24 0.725 0.7514
period = 7:
contrast estimate SE df t.ratio p.value
1 - 2 438 161 24 2.713 0.0314
1 - 3 611 161 24 3.785 0.0025
2 - 3 173 161 24 1.072 0.5404
Results are averaged over the levels of: cross
Degrees-of-freedom method: containment
P value adjustment: tukey method for comparing a family of 3 estimates
###The estimated marginal means (emmeans) for each diet were calculated, representing the average response for each diet, adjusted for the potential influence of interactions. Pairwise Comparisons Across Diets:
Pairwise comparisons between diets were conducted across the 7-week period, revealing statistically significant differences. Diet 1 vs. Diet 2: Significant difference. Diet 1 vs. Diet 3: Significant difference. Diet 2 vs. Diet 3: Significant difference.
The degrees-of-freedom method used is “containment,” and the analysis was conducted at a 95% confidence level.
P-values from the pairwise comparisons were adjusted using Tukey’s method to control for multiple testing.
for each period, compare the two crosses and write conclusions (10 p)
##Across all periods, there are consistent differences in the variable of interest between Cross 1 and Cross 2. Statistically significant differences between the two crosses were observed in Periods 0, 1, and 7. In Periods 0 and 1, Cross 1 showed a significantly lower value compared to Cross 2. In Period 7, Cross 1 showed a significantly lower value compared to Cross 2. Non-Significant Differences:
In Periods 2, 3, 4, 5, and 6, there were no statistically significant differences between Cross 1 and Cross 2.
significance of differences vary across periods, suggesting potential period-specific dynamics influencing the variable of interest.
combined_emmeans_cross <- emmeans(model_2, ~cross | period)NOTE: Results may be misleading due to involvement in interactions
pairwise_comparisons_cross <- pairs(combined_emmeans_cross)
summary(pairwise_comparisons_cross)period = 0:
contrast estimate SE df t.ratio p.value
1 - 2 -641 132 24 -4.865 0.0001
period = 1:
contrast estimate SE df t.ratio p.value
1 - 2 -373 132 24 -2.827 0.0093
period = 2:
contrast estimate SE df t.ratio p.value
1 - 2 -205 132 24 -1.558 0.1324
period = 3:
contrast estimate SE df t.ratio p.value
1 - 2 -157 132 24 -1.194 0.2443
period = 4:
contrast estimate SE df t.ratio p.value
1 - 2 -149 132 24 -1.128 0.2705
period = 5:
contrast estimate SE df t.ratio p.value
1 - 2 -195 132 24 -1.477 0.1527
period = 6:
contrast estimate SE df t.ratio p.value
1 - 2 -113 132 24 -0.860 0.3984
period = 7:
contrast estimate SE df t.ratio p.value
1 - 2 -287 132 24 -2.175 0.0397
Results are averaged over the levels of: Diet
Degrees-of-freedom method: containment