Introduction

This project aims to analyze the relationship between feed intake and milk yield in dairy cows using a mixed-effects model with REML (Restricted Maximum Likelihood) for random effects and BLUE (Best Linear Unbiased Estimators) for fixed effects. We use synthetic data to simulate cow milk production, feed intake, and cow-specific characteristics like breed, age, and lactation stage.

Key Objectives:

  • Model milk yield based on feed intake using a linear mixed-effects model.
  • Estimate variance due to individual cow differences using REML.
  • Visualize relationships between variables and interpret model results.

Step 1: Load Libraries and Prepare Synthetic Data

# Load required libraries
Warning message:
package ‘RMySQL’ was built under R version 4.2.3 
library(lme4)  # For mixed-effects models and REML
Loading required package: Matrix
library(ggplot2)
Warning: package ‘ggplot2’ was built under R version 4.2.3
library(dplyr)
Warning: package ‘dplyr’ was built under R version 4.2.3
Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(tidyr)

Attaching package: ‘tidyr’

The following objects are masked from ‘package:Matrix’:

    expand, pack, unpack
# 1.1 Load open-source or synthetic data
# Here we generate synthetic data as an example
set.seed(123)

# Generate synthetic data for 100 cows over 365 days
n_cows <- 100
n_days <- 365
cow_ids <- rep(1:n_cows, each = n_days)

# Generate synthetic cow factors (breed, age, lactation stage)
breed <- sample(c("Holstein", "Jersey", "Guernsey"), n_cows, replace = TRUE)
age <- sample(3:8, n_cows, replace = TRUE)
lactation_stage <- sample(1:3, n_cows, replace = TRUE)

# Generate daily feed intake (kg), milk yield (liters), and individual variability
feed_protein <- rnorm(n_cows * n_days, mean = 16, sd = 2)
feed_fat <- rnorm(n_cows * n_days, mean = 4, sd = 1)
feed_energy <- rnorm(n_cows * n_days, mean = 2800, sd = 500)  # kcal

# Random effect: individual cow variability
cow_random_effect <- rnorm(n_cows, mean = 0, sd = 2)

# Milk yield (liters) with random effect for cow
milk_yield <- 25 + 0.5 * feed_protein + 0.3 * feed_fat + cow_random_effect[rep(1:n_cows, each = n_days)] + 
              rnorm(n_cows * n_days, mean = 0, sd = 5)

# Combine into a data frame
synthetic_data <- data.frame(
  cow_id = cow_ids,
  breed = rep(breed, each = n_days),
  age = rep(age, each = n_days),
  lactation_stage = rep(lactation_stage, each = n_days),
  feed_protein = feed_protein,
  feed_fat = feed_fat,
  feed_energy = feed_energy,
  milk_yield = milk_yield
)

# View the first few rows of the data
head(synthetic_data)
NA

Step 2: Exploratory Data Analysis (EDA)

# 2.1 Summary statistics
summary(synthetic_data)
     cow_id          breed                age       lactation_stage
 Min.   :  1.00   Length:36500       Min.   :3.00   Min.   :1.00   
 1st Qu.: 25.75   Class :character   1st Qu.:4.00   1st Qu.:1.00   
 Median : 50.50   Mode  :character   Median :6.00   Median :1.50   
 Mean   : 50.50                      Mean   :5.77   Mean   :1.71   
 3rd Qu.: 75.25                      3rd Qu.:7.00   3rd Qu.:2.00   
 Max.   :100.00                      Max.   :8.00   Max.   :3.00   
  feed_protein      feed_fat       feed_energy       milk_yield   
 Min.   : 7.89   Min.   :-0.166   Min.   : 552.2   Min.   :13.08  
 1st Qu.:14.64   1st Qu.: 3.329   1st Qu.:2458.0   1st Qu.:30.38  
 Median :15.97   Median : 3.999   Median :2798.9   Median :34.08  
 Mean   :15.99   Mean   : 4.002   Mean   :2798.5   Mean   :34.06  
 3rd Qu.:17.33   3rd Qu.: 4.679   3rd Qu.:3135.7   3rd Qu.:37.75  
 Max.   :23.72   Max.   : 8.090   Max.   :5423.4   Max.   :55.99  
# 2.2 Visualize milk yield by breed
ggplot(synthetic_data, aes(x = breed, y = milk_yield)) +
  geom_boxplot() +
  labs(title = "Milk Yield by Breed", x = "Breed", y = "Milk Yield (liters)")


# 2.3 Visualize the relationship between feed protein and milk yield
ggplot(synthetic_data, aes(x = feed_protein, y = milk_yield)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", color = "blue") +
  labs(title = "Milk Yield vs Feed Protein Intake", x = "Feed Protein (kg)", y = "Milk Yield (liters)")

Step 3: Fit Mixed-Effects Model Using REML

We will use REML to fit a linear mixed-effects model that accounts for the random effect of individual cow variability and fixed effects like feed intake.

# 3.1 Fit a mixed-effects model using REML
# Random effect: cow-specific variation
# Fixed effects: feed_protein, feed_fat
model_reml <- lmer(milk_yield ~ feed_protein + feed_fat + (1 | cow_id), data = synthetic_data, REML = TRUE)

# 3.2 Model summary
summary(model_reml)
Linear mixed model fit by REML ['lmerMod']
Formula: milk_yield ~ feed_protein + feed_fat + (1 | cow_id)
   Data: synthetic_data

REML criterion at convergence: 221699.9

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.9714 -0.6788  0.0002  0.6776  4.1166 

Random effects:
 Groups   Name        Variance Std.Dev.
 cow_id   (Intercept)  3.099   1.760   
 Residual             25.161   5.016   
Number of obs: 36500, groups:  cow_id, 100

Fixed effects:
             Estimate Std. Error t value
(Intercept)  24.85800    0.29480   84.32
feed_protein  0.49356    0.01317   37.47
feed_fat      0.32652    0.02632   12.40

Correlation of Fixed Effects:
            (Intr) fd_prt
feed_proten -0.713       
feed_fat    -0.354 -0.005
# 3.3 Extract variance components (random effect)
VarCorr(model_reml)
 Groups   Name        Std.Dev.
 cow_id   (Intercept) 1.7603  
 Residual             5.0161  
# 3.4 Visualize fitted vs actual milk yield
synthetic_data$fitted_values <- fitted(model_reml)

ggplot(synthetic_data, aes(x = milk_yield, y = fitted_values)) +
  geom_point(alpha = 0.5, color = "green") +
  geom_abline(slope = 1, intercept = 0, color = "red") +
  labs(title = "Actual vs Fitted Milk Yield", x = "Actual Milk Yield (liters)", y = "Fitted Milk Yield (liters)")

Step 4: Calculate BLUE for Fixed Effects

We will calculate the Best Linear Unbiased Estimator (BLUE) for the fixed effects in the model, which will give us unbiased estimates of the effects of feed intake on milk yield.

# 4.1 Fixed effects are already estimated by the model
fixed_effects <- fixef(model_reml)
print(fixed_effects)
 (Intercept) feed_protein     feed_fat 
  24.8579981    0.4935608    0.3265210 
# 4.2 BLUE values (fixed effect estimates)
# The fixed effect estimates are the BLUE for this mixed-effects model:
# These values represent the unbiased estimates of feed_protein and feed_fat effects

Step 5: Model Diagnostics and Interpretation

# 5.1 Residual plot to check model assumptions
plot(residuals(model_reml), main = "Residuals of Mixed-Effects Model")


# 5.2 Interpret the results
# The model shows how much feed protein and feed fat influence milk yield on average,
# while accounting for the random variation between cows.

# 5.3 Calculate random effect variance
random_effect_variance <- as.data.frame(VarCorr(model_reml))$vcov[2]  # Cow-specific variance
print(paste("Random Effect Variance (Cow):", round(random_effect_variance, 2)))
[1] "Random Effect Variance (Cow): 25.16"
LS0tCnRpdGxlOiAiUkVNTCBhbmQgTWl4ZWQtRWZmZWN0cyBNb2RlbDogTWlsayBZaWVsZCBBbmFseXNpcyIKYXV0aG9yOiAiSmViaW4gTGFyb3NoIEplcnZpcyIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyMgSW50cm9kdWN0aW9uCgpUaGlzIHByb2plY3QgYWltcyB0byBhbmFseXplIHRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiBmZWVkIGludGFrZSBhbmQgbWlsayB5aWVsZCBpbiBkYWlyeSBjb3dzIHVzaW5nIGEgKiptaXhlZC1lZmZlY3RzIG1vZGVsKiogd2l0aCAqKlJFTUwgKFJlc3RyaWN0ZWQgTWF4aW11bSBMaWtlbGlob29kKSoqIGZvciByYW5kb20gZWZmZWN0cyBhbmQgKipCTFVFIChCZXN0IExpbmVhciBVbmJpYXNlZCBFc3RpbWF0b3JzKSoqIGZvciBmaXhlZCBlZmZlY3RzLiBXZSB1c2Ugc3ludGhldGljIGRhdGEgdG8gc2ltdWxhdGUgY293IG1pbGsgcHJvZHVjdGlvbiwgZmVlZCBpbnRha2UsIGFuZCBjb3ctc3BlY2lmaWMgY2hhcmFjdGVyaXN0aWNzIGxpa2UgYnJlZWQsIGFnZSwgYW5kIGxhY3RhdGlvbiBzdGFnZS4KCiMjIyBLZXkgT2JqZWN0aXZlczoKLSBNb2RlbCBtaWxrIHlpZWxkIGJhc2VkIG9uIGZlZWQgaW50YWtlIHVzaW5nIGEgbGluZWFyIG1peGVkLWVmZmVjdHMgbW9kZWwuCi0gRXN0aW1hdGUgdmFyaWFuY2UgZHVlIHRvIGluZGl2aWR1YWwgY293IGRpZmZlcmVuY2VzIHVzaW5nIFJFTUwuCi0gVmlzdWFsaXplIHJlbGF0aW9uc2hpcHMgYmV0d2VlbiB2YXJpYWJsZXMgYW5kIGludGVycHJldCBtb2RlbCByZXN1bHRzLgoKIyMgU3RlcCAxOiBMb2FkIExpYnJhcmllcyBhbmQgUHJlcGFyZSBTeW50aGV0aWMgRGF0YQoKYGBge3J9CiMgTG9hZCByZXF1aXJlZCBsaWJyYXJpZXMKbGlicmFyeShsbWU0KSAgIyBGb3IgbWl4ZWQtZWZmZWN0cyBtb2RlbHMgYW5kIFJFTUwKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHRpZHlyKQoKIyAxLjEgTG9hZCBvcGVuLXNvdXJjZSBvciBzeW50aGV0aWMgZGF0YQojIEhlcmUgd2UgZ2VuZXJhdGUgc3ludGhldGljIGRhdGEgYXMgYW4gZXhhbXBsZQpzZXQuc2VlZCgxMjMpCgojIEdlbmVyYXRlIHN5bnRoZXRpYyBkYXRhIGZvciAxMDAgY293cyBvdmVyIDM2NSBkYXlzCm5fY293cyA8LSAxMDAKbl9kYXlzIDwtIDM2NQpjb3dfaWRzIDwtIHJlcCgxOm5fY293cywgZWFjaCA9IG5fZGF5cykKCiMgR2VuZXJhdGUgc3ludGhldGljIGNvdyBmYWN0b3JzIChicmVlZCwgYWdlLCBsYWN0YXRpb24gc3RhZ2UpCmJyZWVkIDwtIHNhbXBsZShjKCJIb2xzdGVpbiIsICJKZXJzZXkiLCAiR3Vlcm5zZXkiKSwgbl9jb3dzLCByZXBsYWNlID0gVFJVRSkKYWdlIDwtIHNhbXBsZSgzOjgsIG5fY293cywgcmVwbGFjZSA9IFRSVUUpCmxhY3RhdGlvbl9zdGFnZSA8LSBzYW1wbGUoMTozLCBuX2Nvd3MsIHJlcGxhY2UgPSBUUlVFKQoKIyBHZW5lcmF0ZSBkYWlseSBmZWVkIGludGFrZSAoa2cpLCBtaWxrIHlpZWxkIChsaXRlcnMpLCBhbmQgaW5kaXZpZHVhbCB2YXJpYWJpbGl0eQpmZWVkX3Byb3RlaW4gPC0gcm5vcm0obl9jb3dzICogbl9kYXlzLCBtZWFuID0gMTYsIHNkID0gMikKZmVlZF9mYXQgPC0gcm5vcm0obl9jb3dzICogbl9kYXlzLCBtZWFuID0gNCwgc2QgPSAxKQpmZWVkX2VuZXJneSA8LSBybm9ybShuX2Nvd3MgKiBuX2RheXMsIG1lYW4gPSAyODAwLCBzZCA9IDUwMCkgICMga2NhbAoKIyBSYW5kb20gZWZmZWN0OiBpbmRpdmlkdWFsIGNvdyB2YXJpYWJpbGl0eQpjb3dfcmFuZG9tX2VmZmVjdCA8LSBybm9ybShuX2Nvd3MsIG1lYW4gPSAwLCBzZCA9IDIpCgojIE1pbGsgeWllbGQgKGxpdGVycykgd2l0aCByYW5kb20gZWZmZWN0IGZvciBjb3cKbWlsa195aWVsZCA8LSAyNSArIDAuNSAqIGZlZWRfcHJvdGVpbiArIDAuMyAqIGZlZWRfZmF0ICsgY293X3JhbmRvbV9lZmZlY3RbcmVwKDE6bl9jb3dzLCBlYWNoID0gbl9kYXlzKV0gKyAKICAgICAgICAgICAgICBybm9ybShuX2Nvd3MgKiBuX2RheXMsIG1lYW4gPSAwLCBzZCA9IDUpCgojIENvbWJpbmUgaW50byBhIGRhdGEgZnJhbWUKc3ludGhldGljX2RhdGEgPC0gZGF0YS5mcmFtZSgKICBjb3dfaWQgPSBjb3dfaWRzLAogIGJyZWVkID0gcmVwKGJyZWVkLCBlYWNoID0gbl9kYXlzKSwKICBhZ2UgPSByZXAoYWdlLCBlYWNoID0gbl9kYXlzKSwKICBsYWN0YXRpb25fc3RhZ2UgPSByZXAobGFjdGF0aW9uX3N0YWdlLCBlYWNoID0gbl9kYXlzKSwKICBmZWVkX3Byb3RlaW4gPSBmZWVkX3Byb3RlaW4sCiAgZmVlZF9mYXQgPSBmZWVkX2ZhdCwKICBmZWVkX2VuZXJneSA9IGZlZWRfZW5lcmd5LAogIG1pbGtfeWllbGQgPSBtaWxrX3lpZWxkCikKCiMgVmlldyB0aGUgZmlyc3QgZmV3IHJvd3Mgb2YgdGhlIGRhdGEKaGVhZChzeW50aGV0aWNfZGF0YSkKCmBgYAoKCiMjIFN0ZXAgMjogRXhwbG9yYXRvcnkgRGF0YSBBbmFseXNpcyAoRURBKQoKYGBge3J9CiMgMi4xIFN1bW1hcnkgc3RhdGlzdGljcwpzdW1tYXJ5KHN5bnRoZXRpY19kYXRhKQoKIyAyLjIgVmlzdWFsaXplIG1pbGsgeWllbGQgYnkgYnJlZWQKZ2dwbG90KHN5bnRoZXRpY19kYXRhLCBhZXMoeCA9IGJyZWVkLCB5ID0gbWlsa195aWVsZCkpICsKICBnZW9tX2JveHBsb3QoKSArCiAgbGFicyh0aXRsZSA9ICJNaWxrIFlpZWxkIGJ5IEJyZWVkIiwgeCA9ICJCcmVlZCIsIHkgPSAiTWlsayBZaWVsZCAobGl0ZXJzKSIpCgojIDIuMyBWaXN1YWxpemUgdGhlIHJlbGF0aW9uc2hpcCBiZXR3ZWVuIGZlZWQgcHJvdGVpbiBhbmQgbWlsayB5aWVsZApnZ3Bsb3Qoc3ludGhldGljX2RhdGEsIGFlcyh4ID0gZmVlZF9wcm90ZWluLCB5ID0gbWlsa195aWVsZCkpICsKICBnZW9tX3BvaW50KGFscGhhID0gMC41KSArCiAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIiwgY29sb3IgPSAiYmx1ZSIpICsKICBsYWJzKHRpdGxlID0gIk1pbGsgWWllbGQgdnMgRmVlZCBQcm90ZWluIEludGFrZSIsIHggPSAiRmVlZCBQcm90ZWluIChrZykiLCB5ID0gIk1pbGsgWWllbGQgKGxpdGVycykiKQoKYGBgCgojIyBTdGVwIDM6IEZpdCBNaXhlZC1FZmZlY3RzIE1vZGVsIFVzaW5nIFJFTUwKV2Ugd2lsbCB1c2UgUkVNTCB0byBmaXQgYSBsaW5lYXIgbWl4ZWQtZWZmZWN0cyBtb2RlbCB0aGF0IGFjY291bnRzIGZvciB0aGUgcmFuZG9tIGVmZmVjdCBvZiBpbmRpdmlkdWFsIGNvdyB2YXJpYWJpbGl0eSBhbmQgZml4ZWQgZWZmZWN0cyBsaWtlIGZlZWQgaW50YWtlLgoKYGBge3J9CiMgMy4xIEZpdCBhIG1peGVkLWVmZmVjdHMgbW9kZWwgdXNpbmcgUkVNTAojIFJhbmRvbSBlZmZlY3Q6IGNvdy1zcGVjaWZpYyB2YXJpYXRpb24KIyBGaXhlZCBlZmZlY3RzOiBmZWVkX3Byb3RlaW4sIGZlZWRfZmF0Cm1vZGVsX3JlbWwgPC0gbG1lcihtaWxrX3lpZWxkIH4gZmVlZF9wcm90ZWluICsgZmVlZF9mYXQgKyAoMSB8IGNvd19pZCksIGRhdGEgPSBzeW50aGV0aWNfZGF0YSwgUkVNTCA9IFRSVUUpCgojIDMuMiBNb2RlbCBzdW1tYXJ5CnN1bW1hcnkobW9kZWxfcmVtbCkKCiMgMy4zIEV4dHJhY3QgdmFyaWFuY2UgY29tcG9uZW50cyAocmFuZG9tIGVmZmVjdCkKVmFyQ29ycihtb2RlbF9yZW1sKQoKIyAzLjQgVmlzdWFsaXplIGZpdHRlZCB2cyBhY3R1YWwgbWlsayB5aWVsZApzeW50aGV0aWNfZGF0YSRmaXR0ZWRfdmFsdWVzIDwtIGZpdHRlZChtb2RlbF9yZW1sKQoKZ2dwbG90KHN5bnRoZXRpY19kYXRhLCBhZXMoeCA9IG1pbGtfeWllbGQsIHkgPSBmaXR0ZWRfdmFsdWVzKSkgKwogIGdlb21fcG9pbnQoYWxwaGEgPSAwLjUsIGNvbG9yID0gImdyZWVuIikgKwogIGdlb21fYWJsaW5lKHNsb3BlID0gMSwgaW50ZXJjZXB0ID0gMCwgY29sb3IgPSAicmVkIikgKwogIGxhYnModGl0bGUgPSAiQWN0dWFsIHZzIEZpdHRlZCBNaWxrIFlpZWxkIiwgeCA9ICJBY3R1YWwgTWlsayBZaWVsZCAobGl0ZXJzKSIsIHkgPSAiRml0dGVkIE1pbGsgWWllbGQgKGxpdGVycykiKQoKYGBgCiMjIFN0ZXAgNDogQ2FsY3VsYXRlIEJMVUUgZm9yIEZpeGVkIEVmZmVjdHMKV2Ugd2lsbCBjYWxjdWxhdGUgdGhlIEJlc3QgTGluZWFyIFVuYmlhc2VkIEVzdGltYXRvciAoQkxVRSkgZm9yIHRoZSBmaXhlZCBlZmZlY3RzIGluIHRoZSBtb2RlbCwgd2hpY2ggd2lsbCBnaXZlIHVzIHVuYmlhc2VkIGVzdGltYXRlcyBvZiB0aGUgZWZmZWN0cyBvZiBmZWVkIGludGFrZSBvbiBtaWxrIHlpZWxkLgoKYGBge3J9CiMgNC4xIEZpeGVkIGVmZmVjdHMgYXJlIGFscmVhZHkgZXN0aW1hdGVkIGJ5IHRoZSBtb2RlbApmaXhlZF9lZmZlY3RzIDwtIGZpeGVmKG1vZGVsX3JlbWwpCnByaW50KGZpeGVkX2VmZmVjdHMpCgojIDQuMiBCTFVFIHZhbHVlcyAoZml4ZWQgZWZmZWN0IGVzdGltYXRlcykKIyBUaGUgZml4ZWQgZWZmZWN0IGVzdGltYXRlcyBhcmUgdGhlIEJMVUUgZm9yIHRoaXMgbWl4ZWQtZWZmZWN0cyBtb2RlbDoKIyBUaGVzZSB2YWx1ZXMgcmVwcmVzZW50IHRoZSB1bmJpYXNlZCBlc3RpbWF0ZXMgb2YgZmVlZF9wcm90ZWluIGFuZCBmZWVkX2ZhdCBlZmZlY3RzCgpgYGAKCiMjIFN0ZXAgNTogTW9kZWwgRGlhZ25vc3RpY3MgYW5kIEludGVycHJldGF0aW9uCmBgYHtyfQojIDUuMSBSZXNpZHVhbCBwbG90IHRvIGNoZWNrIG1vZGVsIGFzc3VtcHRpb25zCnBsb3QocmVzaWR1YWxzKG1vZGVsX3JlbWwpLCBtYWluID0gIlJlc2lkdWFscyBvZiBNaXhlZC1FZmZlY3RzIE1vZGVsIikKCiMgNS4yIEludGVycHJldCB0aGUgcmVzdWx0cwojIFRoZSBtb2RlbCBzaG93cyBob3cgbXVjaCBmZWVkIHByb3RlaW4gYW5kIGZlZWQgZmF0IGluZmx1ZW5jZSBtaWxrIHlpZWxkIG9uIGF2ZXJhZ2UsCiMgd2hpbGUgYWNjb3VudGluZyBmb3IgdGhlIHJhbmRvbSB2YXJpYXRpb24gYmV0d2VlbiBjb3dzLgoKIyA1LjMgQ2FsY3VsYXRlIHJhbmRvbSBlZmZlY3QgdmFyaWFuY2UKcmFuZG9tX2VmZmVjdF92YXJpYW5jZSA8LSBhcy5kYXRhLmZyYW1lKFZhckNvcnIobW9kZWxfcmVtbCkpJHZjb3ZbMl0gICMgQ293LXNwZWNpZmljIHZhcmlhbmNlCnByaW50KHBhc3RlKCJSYW5kb20gRWZmZWN0IFZhcmlhbmNlIChDb3cpOiIsIHJvdW5kKHJhbmRvbV9lZmZlY3RfdmFyaWFuY2UsIDIpKSkKCmBgYAoK