Last Homework

Repeated Measures

Author

Andrea Nunez

Published

Invalid Date

[1] "C:/Users/anune/Downloads"

Setup Code

#================================================================#
# Setup Options
#================================================================#

# good habit to run is the sessionInfo() function to print all relevant info
sessionInfo()

R version 4.1.3 (2022-03-10)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 22621)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

loaded via a namespace (and not attached):
 [1] htmlwidgets_1.6.2 compiler_4.1.3    fastmap_1.1.1     cli_3.6.1        
 [5] tools_4.1.3       htmltools_0.5.5   rstudioapi_0.15.0 yaml_2.3.7       
 [9] rmarkdown_2.25    knitr_1.44        jsonlite_1.8.4    xfun_0.39        
[13] digest_0.6.31     rlang_1.1.0       evaluate_0.22

# remove all objects if restarting script
rm(list=ls())

knitr::opts_knit$set(root.dir = "C:\\Users\\anune\\OneDrive - Iowa State University\\ANS500AB")
getwd()

[1] "C:/Users/anune/Downloads"

# list all objects in your environment
ls()

character(0)

# set tibble width for printing - print all columns
options(tibble.width = Inf)

# remove scientific notation (by default will print)
options(scipen=999)

#================================================================#
# Packages
#================================================================#

# Here we must load the packages for the current environment to make them
# accessible. 

# load libraries
library(tidyverse)
library(emmeans)
library(car)
library(DT)
library(lme4)
library(lmerTest)

Final Homework

Read Data

Read weekly milk production in grams from Ewes of 2crosses subject to 3 diets

rm(list = ls())

library(readr)
library(stringr)


setwd("C:\\Users\\anune\\OneDrive - Iowa State University\\ANS500AB")

sheep <- readxl::read_xlsx("C:\\Users\\anune\\OneDrive - Iowa State University\\ANS500AB\\sheep_milk.xlsx")


head(sheep)

# A tibble: 6 x 10
  cross  Diet   w_0   w_1   w_2   w_3   w_4   w_5   w_6   w_7
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     1     1  1260  1640  1080  1000   950  1000   900   800
2     1     1  1900  2570  1100  1520  1200  1300  1350  1250
3     1     1  1380  2760  1550   980  1380  1000  1300  1050
4     1     1  3910  3190  1050  1180   980   850  1100   950
5     1     1  2900  2440  1280  1540   740   800   600   500
6     1     2  2210  2330  1700  1180   890   850   750   600

datatable(sheep)

Transform data (10p)

Add a column called animal that contains the row number (2p)

Transform data to long format, including the following columns: cross, Diet, animal, week, milk (2p)

Add a column called indiv, that contains the animal, but as a factor. (2p) Add a column called period, that contains the week, but as a factor. (2p) Change the cross and Diet to factors too (2p)

library(tidyr)
sheep$animal <- seq_len(nrow(sheep))

head(sheep)

# A tibble: 6 x 11
  cross  Diet   w_0   w_1   w_2   w_3   w_4   w_5   w_6   w_7 animal
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <int>
1     1     1  1260  1640  1080  1000   950  1000   900   800      1
2     1     1  1900  2570  1100  1520  1200  1300  1350  1250      2
3     1     1  1380  2760  1550   980  1380  1000  1300  1050      3
4     1     1  3910  3190  1050  1180   980   850  1100   950      4
5     1     1  2900  2440  1280  1540   740   800   600   500      5
6     1     2  2210  2330  1700  1180   890   850   750   600      6

colnames(sheep)

 [1] "cross"  "Diet"   "w_0"    "w_1"    "w_2"    "w_3"    "w_4"    "w_5"   
 [9] "w_6"    "w_7"    "animal"

sheep_long <- tidyr::pivot_longer(sheep, cols = starts_with("w_"), 
                                  names_to = "week", values_to = "milk")


sheep_long$indiv <- as.factor(sheep_long$animal)


sheep_long$period <- as.factor(sheep_long$week)


sheep_long$cross <- as.factor(sheep_long$cross)

sheep_long$Diet <- as.factor(sheep_long$Diet)



num_animals <- length(unique(sheep_long$animal))

print(num_animals)

[1] 30

help("pivot_longer")

starting httpd help server ... done

head(sheep_long)

# A tibble: 6 x 7
  cross Diet  animal week   milk indiv period
  <fct> <fct>  <int> <chr> <dbl> <fct> <fct> 
1 1     1          1 w_0    1260 1     w_0   
2 1     1          1 w_1    1640 1     w_1   
3 1     1          1 w_2    1080 1     w_2   
4 1     1          1 w_3    1000 1     w_3   
5 1     1          1 w_4     950 1     w_4   
6 1     1          1 w_5    1000 1     w_5

datatable(sheep_long)

Represent the data (10 p)

Build a graphic with 3 panels, one panel per cross and add line plots of milk production as a function of week, but represent the three diets in different colors

sheep_long$week <- as.numeric(gsub("w_", "", sheep_long$week))

bar_colors <- c("#66c2a5", "#fc8d62", "#8da0cb") 
line_colors <- c("#e78ac3", "#a6d854", "#ffd92f") 

ggplot(sheep_long, aes(x = week, y = milk, fill = factor(Diet))) +
  geom_bar(stat = "identity", position = "dodge", color = "black", width = 0.8, alpha = 0.7, aes(color = factor(Diet))) +
  geom_line(aes(group = interaction(cross, Diet), color = factor(Diet)), position = position_dodge(width = 0.8), size = 0.8) +
  facet_wrap(~cross, scales = "free_y", ncol = 3) +  
  labs(title = "Milk Production as a Function of Week by Cross",
       x = "Week",
       y = "Milk Production (grams)",
       fill = "Diet",
       color = "Diet") +
  scale_fill_manual(values = bar_colors) +
  scale_color_manual(values = line_colors)

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
i Please use `linewidth` instead.

Full model

Fit a model that contains the three way interaction: of period, diet and cross for the fixed effects with an unstructured variance covariance between periods within individual. (10p)

Show the resulting variance covariance matrix (10p)

sheep_long$period <- as.numeric(gsub("w_", "", sheep_long$week))

library(nlme)


Attaching package: 'nlme'

The following object is masked from 'package:lme4':

    lmList

The following object is masked from 'package:dplyr':

    collapse

model_1 <- lme(milk ~ period * Diet * cross, 
               random = ~1 | indiv/period, 
               correlation = corSymm(form = ~1 | indiv),
               data = sheep_long)

Warning in lme.formula(milk ~ period * Diet * cross, random = ~1 |
indiv/period, : cannot use smaller level of grouping for 'correlation' than for
'random'. Replacing the former with the latter.

var_cov_matrix <- VarCorr(model_1)
print(var_cov_matrix)

            Variance     StdDev  
indiv =     pdLogChol(1)         
(Intercept)  11768.17    108.4812
period =    pdLogChol(1)         
(Intercept) 162998.82    403.7311
Residual     21444.11    146.4381

two reduced covariance models

Fit two more models (10p each) with reduced covariances, any model of your choice, but explain your choice. In both cases, show the resulting variance covariance matrix.

sheep_long$period <- factor(sheep_long$period, levels = 0:7)

model_2 <- lme(milk ~ period * Diet * cross, 
               random = ~1 | indiv, 
               data = sheep_long)


var_cov_matrix_2 <- VarCorr(model_2)

print(var_cov_matrix_2)

indiv = pdLogChol(1) 
            Variance  StdDev  
(Intercept)  21181.66 145.5392
Residual    109135.25 330.3562

model_3 <- lme(milk ~ period * Diet * cross, 
               random = ~1 | indiv/period, 
               data = sheep_long, 
               correlation = corAR1(form = ~1 | indiv))

Warning in lme.formula(milk ~ period * Diet * cross, random = ~1 |
indiv/period, : cannot use smaller level of grouping for 'correlation' than for
'random'. Replacing the former with the latter.

var_cov_matrix3 <- VarCorr(model_3)

print(var_cov_matrix3)

            Variance     StdDev  
indiv =     pdLogChol(1)         
(Intercept) 21181.61     145.5390
period =    pdLogChol(1)         
(Intercept) 94968.06     308.1689
Residual    14167.21     119.0261

###Explanation:

Model 2: This model includes a random intercept for each individual (random = ~1 | indiv), but it does not include any correlation between repeated measures within the same individual or across different periods.

Choice Rationale: This model assumes independence of observations within individuals and across periods, making it a simpler model compared to the one with a correlation structure.

Model 3: Now this model includes a random intercept for each individual and allows for a correlation structure among repeated measures within each individual (random = ~1 | indiv/period). The correlation structure is an AR(1) structure (corAR1(form = ~1 | indiv)), which models autocorrelation between observations within the same individual.

Choice Rationale: This model accounts for potential autocorrelation between consecutive periods within the same individual, providing a more complex but potentially more realistic representation of the data.

Both models offer different assumptions about the correlation structure within the data, and the choice between them depends on the underlying patterns and characteristics of your data. Model 3 may be preferred if there is reason to suspect autocorrelation within individuals across periods.

Compare the AIC of the three fitted models and select best model. (10 p)

aic_values <- c(AIC(model_1), AIC(model_2), AIC(model_3))

print(aic_values)

[1] 3505.671 2971.883 2975.883

best_model_index <- which.min(aic_values)
best_model <- list(model_1, model_2, model_3)[best_model_index]

print(best_model)

[[1]]
Linear mixed-effects model fit by REML
  Data: sheep_long 
  Log-restricted-likelihood: -1435.941
  Fixed: milk ~ period * Diet * cross 
         (Intercept)              period1              period2 
                2270                  250                -1058 
             period3              period4              period5 
               -1026                -1220                -1280 
             period6              period7                Diet2 
               -1220                -1360                 -338 
               Diet3               cross2        period1:Diet2 
               -1130                  920                 -508 
       period2:Diet2        period3:Diet2        period4:Diet2 
                 404                   28                  -84 
       period5:Diet2        period6:Diet2        period7:Diet2 
                -112                 -130                 -142 
       period1:Diet3        period2:Diet3        period3:Diet3 
                -462                  576                  290 
       period4:Diet3        period5:Diet3        period6:Diet3 
                 384                  354                  322 
       period7:Diet3       period1:cross2       period2:cross2 
                 380                 -174                 -594 
      period3:cross2       period4:cross2       period5:cross2 
                -816                 -858                 -810 
      period6:cross2       period7:cross2         Diet2:cross2 
                -970                 -754                 -786 
        Diet3:cross2 period1:Diet2:cross2 period2:Diet2:cross2 
                 -50                  -14                  380 
period3:Diet2:cross2 period4:Diet2:cross2 period5:Diet2:cross2 
                 686                  812                  764 
period6:Diet2:cross2 period7:Diet2:cross2 period1:Diet3:cross2 
                 808                  870                 -270 
period2:Diet3:cross2 period3:Diet3:cross2 period4:Diet3:cross2 
                  94                  310                  284 
period5:Diet3:cross2 period6:Diet3:cross2 period7:Diet3:cross2 
                 326                  518                  328 

Random effects:
 Formula: ~1 | indiv
        (Intercept) Residual
StdDev:    145.5392 330.3562

Number of Observations: 240
Number of Groups: 30

##Lower AIC values are better, Model 2 has the lowest AIC and is considered the best model among the three you compared.

ANOVA 10p

using the best model Perform an ANOVA to determine if there is a significant 3-way interaction: perioddietcross hint: use emmeans and join_tests, and specify: periodDietcross

library(lmerTest)

colnames(sheep_long)

[1] "cross"  "Diet"   "animal" "week"   "milk"   "indiv"  "period"

best_model_lmerTest <- lmerTest::lmer(milk ~ period * Diet * cross + (1 | indiv), data = sheep_long)

emmeans_lmerTest <- emmeans::emmeans(best_model_lmerTest, ~ period * Diet * cross)

joint_test_result_lmerTest <- emmeans::joint_tests(emmeans_lmerTest, hypothesis = "period * Diet * cross")

print(joint_test_result_lmerTest)

 model term        df1 df2 F.ratio p.value
 period              7 168  92.503  <.0001
 Diet                2  24  50.195  <.0001
 cross               1  24  15.124  0.0007
 period:Diet        14 168   4.711  <.0001
 period:cross        7 168   2.065  0.0500
 Diet:cross          2  24   2.880  0.0757
 period:Diet:cross  14 168   0.867  0.5954

mean comparison 20p

for each period, compare the three diets pairwise and write conclusions (10 p)

best_model

[[1]]
Linear mixed-effects model fit by REML
  Data: sheep_long 
  Log-restricted-likelihood: -1435.941
  Fixed: milk ~ period * Diet * cross 
         (Intercept)              period1              period2 
                2270                  250                -1058 
             period3              period4              period5 
               -1026                -1220                -1280 
             period6              period7                Diet2 
               -1220                -1360                 -338 
               Diet3               cross2        period1:Diet2 
               -1130                  920                 -508 
       period2:Diet2        period3:Diet2        period4:Diet2 
                 404                   28                  -84 
       period5:Diet2        period6:Diet2        period7:Diet2 
                -112                 -130                 -142 
       period1:Diet3        period2:Diet3        period3:Diet3 
                -462                  576                  290 
       period4:Diet3        period5:Diet3        period6:Diet3 
                 384                  354                  322 
       period7:Diet3       period1:cross2       period2:cross2 
                 380                 -174                 -594 
      period3:cross2       period4:cross2       period5:cross2 
                -816                 -858                 -810 
      period6:cross2       period7:cross2         Diet2:cross2 
                -970                 -754                 -786 
        Diet3:cross2 period1:Diet2:cross2 period2:Diet2:cross2 
                 -50                  -14                  380 
period3:Diet2:cross2 period4:Diet2:cross2 period5:Diet2:cross2 
                 686                  812                  764 
period6:Diet2:cross2 period7:Diet2:cross2 period1:Diet3:cross2 
                 808                  870                 -270 
period2:Diet3:cross2 period3:Diet3:cross2 period4:Diet3:cross2 
                  94                  310                  284 
period5:Diet3:cross2 period6:Diet3:cross2 period7:Diet3:cross2 
                 326                  518                  328 

Random effects:
 Formula: ~1 | indiv
        (Intercept) Residual
StdDev:    145.5392 330.3562

Number of Observations: 240
Number of Groups: 30

sheep_long

# A tibble: 240 x 7
   cross Diet  animal  week  milk indiv period
   <fct> <fct>  <int> <dbl> <dbl> <fct> <fct> 
 1 1     1          1     0  1260 1     0     
 2 1     1          1     1  1640 1     1     
 3 1     1          1     2  1080 1     2     
 4 1     1          1     3  1000 1     3     
 5 1     1          1     4   950 1     4     
 6 1     1          1     5  1000 1     5     
 7 1     1          1     6   900 1     6     
 8 1     1          1     7   800 1     7     
 9 1     1          2     0  1900 2     0     
10 1     1          2     1  2570 2     1     
# i 230 more rows

sheep_long$period <- factor(sheep_long$period, levels = 0:7)

model_2 <- lme(milk ~ period * Diet * cross, 
               random = ~1 | indiv, 
               data = sheep_long)
combined_emmeans_results <- emmeans(model_2, ~Diet | period)

NOTE: Results may be misleading due to involvement in interactions

pairwise_comparisons <- pairs(combined_emmeans_results)

summary(pairwise_comparisons)

period = 0:
 contrast estimate  SE df t.ratio p.value
 1 - 2         731 161 24   4.528  0.0004
 1 - 3        1155 161 24   7.154  <.0001
 2 - 3         424 161 24   2.626  0.0380

period = 1:
 contrast estimate  SE df t.ratio p.value
 1 - 2        1246 161 24   7.718  <.0001
 1 - 3        1752 161 24  10.852  <.0001
 2 - 3         506 161 24   3.134  0.0121

period = 2:
 contrast estimate  SE df t.ratio p.value
 1 - 2         137 161 24   0.849  0.6771
 1 - 3         532 161 24   3.295  0.0083
 2 - 3         395 161 24   2.447  0.0556

period = 3:
 contrast estimate  SE df t.ratio p.value
 1 - 2         360 161 24   2.230  0.0863
 1 - 3         710 161 24   4.398  0.0005
 2 - 3         350 161 24   2.168  0.0975

period = 4:
 contrast estimate  SE df t.ratio p.value
 1 - 2         409 161 24   2.533  0.0463
 1 - 3         629 161 24   3.896  0.0019
 2 - 3         220 161 24   1.363  0.3757

period = 5:
 contrast estimate  SE df t.ratio p.value
 1 - 2         461 161 24   2.856  0.0229
 1 - 3         638 161 24   3.952  0.0017
 2 - 3         177 161 24   1.096  0.5255

period = 6:
 contrast estimate  SE df t.ratio p.value
 1 - 2         457 161 24   2.831  0.0242
 1 - 3         574 161 24   3.555  0.0044
 2 - 3         117 161 24   0.725  0.7514

period = 7:
 contrast estimate  SE df t.ratio p.value
 1 - 2         438 161 24   2.713  0.0314
 1 - 3         611 161 24   3.785  0.0025
 2 - 3         173 161 24   1.072  0.5404

Results are averaged over the levels of: cross 
Degrees-of-freedom method: containment 
P value adjustment: tukey method for comparing a family of 3 estimates

###The estimated marginal means (emmeans) for each diet were calculated, representing the average response for each diet, adjusted for the potential influence of interactions. Pairwise Comparisons Across Diets:

Pairwise comparisons between diets were conducted across the 7-week period, revealing statistically significant differences. Diet 1 vs. Diet 2: Significant difference. Diet 1 vs. Diet 3: Significant difference. Diet 2 vs. Diet 3: Significant difference.

The degrees-of-freedom method used is “containment,” and the analysis was conducted at a 95% confidence level.

P-values from the pairwise comparisons were adjusted using Tukey’s method to control for multiple testing.

for each period, compare the two crosses and write conclusions (10 p)

##Across all periods, there are consistent differences in the variable of interest between Cross 1 and Cross 2. Statistically significant differences between the two crosses were observed in Periods 0, 1, and 7. In Periods 0 and 1, Cross 1 showed a significantly lower value compared to Cross 2. In Period 7, Cross 1 showed a significantly lower value compared to Cross 2. Non-Significant Differences:

In Periods 2, 3, 4, 5, and 6, there were no statistically significant differences between Cross 1 and Cross 2.

significance of differences vary across periods, suggesting potential period-specific dynamics influencing the variable of interest.

combined_emmeans_cross <- emmeans(model_2, ~cross | period)

NOTE: Results may be misleading due to involvement in interactions

pairwise_comparisons_cross <- pairs(combined_emmeans_cross)

summary(pairwise_comparisons_cross)

period = 0:
 contrast estimate  SE df t.ratio p.value
 1 - 2        -641 132 24  -4.865  0.0001

period = 1:
 contrast estimate  SE df t.ratio p.value
 1 - 2        -373 132 24  -2.827  0.0093

period = 2:
 contrast estimate  SE df t.ratio p.value
 1 - 2        -205 132 24  -1.558  0.1324

period = 3:
 contrast estimate  SE df t.ratio p.value
 1 - 2        -157 132 24  -1.194  0.2443

period = 4:
 contrast estimate  SE df t.ratio p.value
 1 - 2        -149 132 24  -1.128  0.2705

period = 5:
 contrast estimate  SE df t.ratio p.value
 1 - 2        -195 132 24  -1.477  0.1527

period = 6:
 contrast estimate  SE df t.ratio p.value
 1 - 2        -113 132 24  -0.860  0.3984

period = 7:
 contrast estimate  SE df t.ratio p.value
 1 - 2        -287 132 24  -2.175  0.0397

Results are averaged over the levels of: Diet 
Degrees-of-freedom method: containment