library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(knitr)
library(kableExtra)
library(scales)

theme_report <- function() {
  theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(face = "bold", size = 14, hjust = 0, color = "#1a365d"),
      plot.subtitle = element_text(size = 11, hjust = 0, color = "#4a5568"),
      legend.position = "bottom",
      panel.grid.minor = element_blank(),
      axis.title = element_text(face = "bold", size = 11),
      strip.text = element_text(face = "bold", size = 11)
    )
}
theme_set(theme_report())
# =============================================================================
# SET YOUR WORKING DIRECTORY - MODIFY IF NEEDED
# =============================================================================
working_dir <- "C:/Users/SAlemu/OneDrive - CGIAR/Documents/Poultryaudit/Dataset"
setwd(working_dir)

# File paths for phenotypic data
pheno_file_3lines <- "Additional_Poultry dataset_audit.xlsx"
pheno_file_wil <- "Dataset_evaluation_corrected.xlsx"

# Base directory for WOMBAT results
wombat_base <- "Analysis_with_wombat2"
model_dir <- "Model1_Animal_YearMonth"
# =============================================================================
# LOAD PHENOTYPIC DATA FOR ALL 4 LINES
# =============================================================================

clean_names <- function(df) {
  colnames(df) <- gsub(" ", "_", colnames(df))
  return(df)
}

convert_types <- function(df) {
  df$Generation <- as.character(df$Generation)
  if ("Batch" %in% colnames(df)) df$Batch <- as.character(df$Batch)
  df$ID <- as.character(df$ID)
  if ("Sire_ID" %in% colnames(df)) df$Sire_ID <- as.character(df$Sire_ID)
  if ("Dam_ID" %in% colnames(df)) df$Dam_ID <- as.character(df$Dam_ID)
  return(df)
}

# Load LIL, BIL, NHIL from one file
lil_data <- read_excel(pheno_file_3lines, sheet = "LIL") %>% clean_names() %>% convert_types()
bil_data <- read_excel(pheno_file_3lines, sheet = "BIL") %>% clean_names() %>% convert_types()
nhil_data <- read_excel(pheno_file_3lines, sheet = "NHIL") %>% clean_names() %>% convert_types()

# Load WIL from separate file
wil_data <- read_excel(pheno_file_wil) %>% clean_names() %>% convert_types()

# Add Line identifier
lil_data$Line <- "LIL-20"
bil_data$Line <- "BIL-20"
nhil_data$Line <- "NHIL-20"
wil_data$Line <- "WIL-20"
# =============================================================================
# EXTRACT GENETIC PARAMETERS FROM WOMBAT OUTPUT
# =============================================================================

lines <- c("LIL", "BIL", "NHIL", "WIL")
all_vc <- list()
all_ebv <- list()

extract_wombat <- function(line_code) {
  sum_est_file <- file.path(wombat_base, line_code, model_dir, "SumEstimates.out")
  ebv_file <- file.path(wombat_base, line_code, model_dir, "RnSoln_animal.dat")
  id_map_file <- file.path(wombat_base, line_code, "ID_mapping.csv")
  
  result <- list(vc = NULL, ebv = NULL)
  
  if (!file.exists(sum_est_file)) {
    cat("  Warning:", line_code, "- SumEstimates.out not found\n")
    return(result)
  }
  
  # ----- EXTRACT VARIANCE COMPONENTS -----
  sum_lines <- readLines(sum_est_file)
  
  covs_a_idx <- grep("COVS A 1 1.*vrat", sum_lines)
  
  if (length(covs_a_idx) > 0) {
    covs_a_line <- sum_lines[covs_a_idx[1]]
    nums <- as.numeric(unlist(regmatches(covs_a_line, gregexpr("[0-9]+\\.?[0-9]*", covs_a_line))))
    
    if (length(nums) >= 7) {
      Va <- nums[4]
      Va_SE <- nums[5]
      h2 <- nums[6]
      h2_SE <- nums[7]
    }
  }
  
  covs_z_idx <- grep("COVS Z 1 1.*vrat", sum_lines)
  if (length(covs_z_idx) > 0) {
    covs_z_line <- sum_lines[covs_z_idx[1]]
    nums_z <- as.numeric(unlist(regmatches(covs_z_line, gregexpr("[0-9]+\\.?[0-9]*", covs_z_line))))
    if (length(nums_z) >= 5) {
      Ve <- nums_z[4]
      Ve_SE <- nums_z[5]
    }
  }
  
  covs_t_idx <- grep("COVS T 1 1", sum_lines)
  if (length(covs_t_idx) > 0) {
    covs_t_line <- sum_lines[covs_t_idx[1]]
    nums_t <- as.numeric(unlist(regmatches(covs_t_line, gregexpr("[0-9]+\\.?[0-9]*", covs_t_line))))
    if (length(nums_t) >= 5) {
      Vp <- nums_t[4]
    }
  }
  
  rec_idx <- grep("No. of records", sum_lines)
  if (length(rec_idx) > 0) {
    rec_line <- sum_lines[rec_idx[1]]
    nums_rec <- as.numeric(unlist(regmatches(rec_line, gregexpr("[0-9]+", rec_line))))
    N_rec <- nums_rec[1]
  }
  
  if (exists("h2")) {
    result$vc <- data.frame(
      Line = paste0(line_code, "-20"),
      N = ifelse(exists("N_rec"), N_rec, NA),
      Va = ifelse(exists("Va"), round(Va, 1), NA),
      Va_SE = ifelse(exists("Va_SE"), round(Va_SE, 1), NA),
      Ve = ifelse(exists("Ve"), round(Ve, 1), NA),
      Ve_SE = ifelse(exists("Ve_SE"), round(Ve_SE, 1), NA),
      Vp = ifelse(exists("Vp"), round(Vp, 1), NA),
      h2 = round(h2, 3),
      h2_SE = round(h2_SE, 3)
    )
  }
  
  # ----- EXTRACT EBVs -----
  if (file.exists(ebv_file) && file.exists(id_map_file)) {
    ebv_lines <- readLines(ebv_file)
    header_idx <- grep("Run_No", ebv_lines)
    
    if (length(header_idx) > 0) {
      data_start <- header_idx[1] + 1
      data_lines <- ebv_lines[data_start:length(ebv_lines)]
      data_lines <- data_lines[nchar(trimws(data_lines)) > 0]
      
      ebv_data <- read.table(text = data_lines, header = FALSE, fill = TRUE)
      
      if (ncol(ebv_data) >= 7) {
        colnames(ebv_data) <- c("Run_No", "ID_num", "Trait", "EBV", "SE_EBV", "Accuracy", "Inbreeding_pct")
      } else {
        colnames(ebv_data) <- c("Run_No", "ID_num", "Trait", "EBV", "Inbreeding_pct")[1:ncol(ebv_data)]
      }
      
      id_map <- read.csv(id_map_file, stringsAsFactors = FALSE)
      ebv_merged <- merge(ebv_data, id_map, by.x = "ID_num", by.y = "ID_num", all.x = TRUE)
      
      result$ebv <- data.frame(
        Line = paste0(line_code, "-20"),
        ID_original = as.character(ebv_merged$ID_orig),
        ID_wombat = ebv_merged$ID_num,
        EBV = ebv_merged$EBV,
        SE_EBV = if ("SE_EBV" %in% colnames(ebv_merged)) ebv_merged$SE_EBV else NA,
        Accuracy = if ("Accuracy" %in% colnames(ebv_merged)) ebv_merged$Accuracy else NA,
        Inbreeding_pct = if ("Inbreeding_pct" %in% colnames(ebv_merged)) ebv_merged$Inbreeding_pct else NA
      )
    }
  }
  
  return(result)
}

for (ln in lines) {
  res <- extract_wombat(ln)
  if (!is.null(res$vc)) all_vc[[ln]] <- res$vc
  if (!is.null(res$ebv)) all_ebv[[ln]] <- res$ebv
}

vc_summary <- do.call(rbind, all_vc)
ebv_all <- do.call(rbind, all_ebv)
# =============================================================================
# COMPUTE PHENOTYPIC SUMMARY STATISTICS
# =============================================================================

compute_pheno_stats <- function(data, line_name) {
  total <- nrow(data)
  g0 <- sum(data$Generation == "0", na.rm = TRUE)
  g1 <- sum(data$Generation == "1", na.rm = TRUE)
  g2 <- sum(data$Generation == "2", na.rm = TRUE)
  
  deaths <- sum(!is.na(data$Cull_Reason) & data$Cull_Reason != "", na.rm = TRUE)
  survival <- round((1 - deaths / total) * 100, 1)
  
  bw16_n <- sum(!is.na(data$BW_16wk))
  bw16_mean <- round(mean(data$BW_16wk, na.rm = TRUE), 0)
  bw16_sd <- round(sd(data$BW_16wk, na.rm = TRUE), 0)
  bw16_min <- round(min(data$BW_16wk, na.rm = TRUE), 0)
  bw16_max <- round(max(data$BW_16wk, na.rm = TRUE), 0)
  bw16_cv <- round(bw16_sd / bw16_mean * 100, 1)
  
  male_bw <- data %>% filter(Sex == "M") %>% pull(BW_16wk) %>% mean(na.rm = TRUE) %>% round(0)
  female_bw <- data %>% filter(Sex == "F") %>% pull(BW_16wk) %>% mean(na.rm = TRUE) %>% round(0)
  dimorphism <- round((male_bw - female_bw) / female_bw * 100, 1)
  
  afe_col <- colnames(data)[grepl("AFE", colnames(data), ignore.case = TRUE)][1]
  if (!is.na(afe_col)) {
    egg_n <- sum(!is.na(data[[afe_col]]))
  } else {
    egg_n <- 0
  }
  
  n_females <- sum(data$Sex == "F", na.rm = TRUE)
  egg_pct <- ifelse(n_females > 0, round(egg_n / n_females * 100, 1), 0)
  
  data.frame(
    Line = line_name,
    Total = total,
    G0 = g0,
    G1 = g1,
    G2 = g2,
    Survival_Pct = survival,
    BW16_N = bw16_n,
    BW16_Mean = bw16_mean,
    BW16_SD = bw16_sd,
    BW16_Min = bw16_min,
    BW16_Max = bw16_max,
    BW16_CV = bw16_cv,
    Male_BW16 = male_bw,
    Female_BW16 = female_bw,
    Dimorphism_Pct = dimorphism,
    Egg_N = egg_n,
    Egg_Pct_Females = egg_pct
  )
}

pheno_stats <- bind_rows(
  compute_pheno_stats(lil_data, "LIL-20"),
  compute_pheno_stats(bil_data, "BIL-20"),
  compute_pheno_stats(nhil_data, "NHIL-20"),
  compute_pheno_stats(wil_data, "WIL-20")
)

ebv_stats <- ebv_all %>%
  group_by(Line) %>%
  summarise(
    N = n(),
    Mean_EBV = round(mean(EBV, na.rm = TRUE), 1),
    SD_EBV = round(sd(EBV, na.rm = TRUE), 1),
    Min_EBV = round(min(EBV, na.rm = TRUE), 1),
    Max_EBV = round(max(EBV, na.rm = TRUE), 1),
    Mean_F = round(mean(Inbreeding_pct, na.rm = TRUE), 2),
    N_Inbred = sum(Inbreeding_pct > 0, na.rm = TRUE),
    .groups = "drop"
  )

total_birds <- sum(pheno_stats$Total)

1 Executive Summary

This report presents descriptive statistics and genetic analysis results for four ILRI poultry lines: LIL-20, BIL-20, NHIL-20, and WIL-20. The evaluation encompasses phenotypic characterization across three generations (G0, G1, G2) and genetic parameter estimation using WOMBAT software for body weight at 16 weeks.

3,790
Total Birds
All 4 Lines Combined
1215
LIL-20
797
BIL-20
1116
NHIL-20
662
WIL-20

๐Ÿ“‹ Internal Audit Purpose

This genetic evaluation was initiated for internal audit purposes to assess the current status of ILRIโ€™s poultry breeding program. The analysis establishes baseline genetic parameters and identifies areas for improvement. As the program matures and more data becomes available, this evaluation process will be updated in real time to track genetic progress and inform breeding decisions.

Current Scope:

  • Single trait analysis: Body weight at 16 weeks (BW_16wk)
  • Three generations evaluated: G0 (foundation), G1, and G2
  • Four breeding lines: LIL-20, BIL-20, NHIL-20, and WIL-20

๐Ÿ“… Recommended Evaluation Schedule

To ensure continuous genetic progress and timely selection decisions, this genetic evaluation should be conducted twice per year (biannually). Regular evaluations enable:

  • Tracking of genetic trends over time
  • Identification of top breeding candidates before mating decisions
  • Early detection of any issues in the breeding structure
  • Updated EBV rankings for informed culling and selection

2 Phenotypic Summary

2.1 Population Structure

pheno_stats %>%
  select(Line, Total, G0, G1, G2, Survival_Pct) %>%
  bind_rows(
    data.frame(
      Line = "TOTAL",
      Total = sum(pheno_stats$Total),
      G0 = sum(pheno_stats$G0),
      G1 = sum(pheno_stats$G1),
      G2 = sum(pheno_stats$G2),
      Survival_Pct = round(mean(pheno_stats$Survival_Pct), 1)
    )
  ) %>%
  kable(col.names = c("Line", "Total Birds", "G0", "G1", "G2", "Survival %"),
        align = c("l", rep("r", 5))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#2d3748", color = "white") %>%
  row_spec(5, bold = TRUE, background = "#f7fafc")
Line Total Birds G0 G1 G2 Survival %
LIL-20 1215 612 157 446 74.7
BIL-20 797 300 155 342 92.8
NHIL-20 1116 721 151 244 62.5
WIL-20 662 264 92 306 94.9
TOTAL 3790 1897 555 1338 81.2
pheno_stats %>%
  select(Line, G0, G1, G2) %>%
  pivot_longer(-Line, names_to = "Generation", values_to = "N") %>%
  ggplot(aes(x = Line, y = N, fill = Generation)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_text(aes(label = N), position = position_dodge(width = 0.7), vjust = -0.5, size = 3.5) +
  scale_fill_manual(values = c("G0" = "#3182ce", "G1" = "#38a169", "G2" = "#805ad5")) +
  labs(title = "Population Structure by Line and Generation",
       x = "Line", y = "Number of Birds") +
  scale_y_continuous(expand = expansion(mult = c(0, 0.15)))

2.2 Body Weight at 16 Weeks

pheno_stats %>%
  select(Line, BW16_N, BW16_Mean, BW16_SD, BW16_Min, BW16_Max, BW16_CV) %>%
  kable(col.names = c("Line", "N", "Mean (g)", "SD (g)", "Min (g)", "Max (g)", "CV %"),
        align = c("l", rep("r", 6))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#3182ce", color = "white")
Line N Mean (g) SD (g) Min (g) Max (g) CV %
LIL-20 793 981 224 231 1535 22.8
BIL-20 686 1354 281 392 2254 20.8
NHIL-20 641 1196 409 336 2204 34.2
WIL-20 601 1148 272 359 1875 23.7
all_pheno <- bind_rows(
  lil_data %>% select(Line, BW_16wk),
  bil_data %>% select(Line, BW_16wk),
  nhil_data %>% select(Line, BW_16wk),
  wil_data %>% select(Line, BW_16wk)
)

ggplot(all_pheno %>% filter(!is.na(BW_16wk)), aes(x = Line, y = BW_16wk, fill = Line)) +
  geom_boxplot(alpha = 0.8, outlier.shape = 21) +
  scale_fill_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                                "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
  labs(title = "Body Weight at 16 Weeks - Comparison Across Lines",
       x = "Line", y = "Body Weight (g)") +
  theme(legend.position = "none")

2.3 Phenotypic Trend

pheno_trend <- bind_rows(
  lil_data %>% select(Line, Generation, BW_16wk),
  bil_data %>% select(Line, Generation, BW_16wk),
  nhil_data %>% select(Line, Generation, BW_16wk),
  wil_data %>% select(Line, Generation, BW_16wk)
) %>%
  filter(!is.na(BW_16wk)) %>%
  group_by(Line, Generation) %>%
  summarise(
    N = n(),
    Mean_BW16 = mean(BW_16wk, na.rm = TRUE),
    SE_BW16 = sd(BW_16wk, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  ) %>%
  mutate(Generation = paste0("G", Generation))
pheno_trend_wide <- pheno_trend %>%
  select(Line, Generation, N, Mean_BW16) %>%
  mutate(Mean_BW16 = round(Mean_BW16, 0)) %>%
  pivot_wider(names_from = Generation, values_from = c(N, Mean_BW16), names_sep = "_")

pheno_trend_wide %>%
  kable(align = c("l", rep("r", ncol(pheno_trend_wide) - 1))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#3182ce", color = "white")
Line N_G0 N_G1 N_G2 Mean_BW16_G0 Mean_BW16_G1 Mean_BW16_G2
BIL-20 269 124 293 1422 1346 1294
LIL-20 295 135 363 933 949 1031
NHIL-20 334 91 216 1062 1171 1415
WIL-20 237 87 277 1085 1112 1214
ggplot(pheno_trend, aes(x = Generation, y = Mean_BW16, color = Line, group = Line)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = Mean_BW16 - SE_BW16, ymax = Mean_BW16 + SE_BW16), 
                width = 0.1, linewidth = 0.8) +
  geom_text(aes(label = round(Mean_BW16, 0)), vjust = -1.5, size = 3.5, show.legend = FALSE) +
  scale_color_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                                 "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
  labs(title = "Phenotypic Trend: Mean Body Weight at 16 Weeks by Generation",
       subtitle = "Error bars represent ยฑ 1 standard error",
       x = "Generation", y = "Mean Body Weight (g)", color = "Line") +
  theme(legend.position = "bottom") +
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.15)))

๐Ÿ“ˆ Phenotypic Trend Interpretation

The phenotypic trend shows the change in mean body weight at 16 weeks across generations. An upward trend suggests phenotypic improvement, though this includes both genetic and environmental effects. Note that G0 represents foundation stock, while G1 and G2 are subsequent breeding generations.

2.4 Sexual Dimorphism

pheno_stats %>%
  select(Line, Male_BW16, Female_BW16, Dimorphism_Pct) %>%
  kable(col.names = c("Line", "Male BW_16wk (g)", "Female BW_16wk (g)", "Dimorphism (%)"),
        align = c("l", rep("r", 3))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2d3748", color = "white")
Line Male BW_16wk (g) Female BW_16wk (g) Dimorphism (%)
LIL-20 1125 866 29.9
BIL-20 1494 1200 24.5
NHIL-20 1427 995 43.4
WIL-20 1282 996 28.7

3 Genetic Analysis

3.1 Variance Components and Heritability

โ„น๏ธ Analysis Method

Genetic parameters were estimated using WOMBAT software with an animal model. The model included the following fixed effects:

  • Sex (male/female)
  • Batch (hatching batch)
  • Year (year of hatch)
  • Month (month of hatch)

The trait analyzed is body weight at 16 weeks (BW_16wk).

if (!is.null(vc_summary) && nrow(vc_summary) > 0) {
  vc_summary %>%
    kable(col.names = c("Line", "N", "Va", "SE(Va)", "Ve", "SE(Ve)", "Vp", "hยฒ", "SE(hยฒ)"),
          align = c("l", rep("r", 8))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#805ad5", color = "white") %>%
    row_spec(which(vc_summary$h2 > 0.6), bold = TRUE, background = "#fed7d7")
} else {
  cat("WOMBAT results not available. Please ensure Analysis_with_wombat2 folder contains completed analyses.\n")
}
Line N Va SE(Va) Ve SE(Ve) Vp hยฒ SE(hยฒ)
LIL LIL-20 787 19579.0 4163.9 14212.6 2787.6 33791.6 0.579 0.097
BIL BIL-20 685 12995.1 5250.1 30647.0 4169.6 43642.2 0.298 0.110
NHIL NHIL-20 638 89703.6 0.0 28389.8 4796.9 118093.0 0.760 0.031
WIL WIL-20 599 15082.8 5071.0 24419.8 3824.7 39502.6 0.382 0.112
if (!is.null(vc_summary) && nrow(vc_summary) > 0) {
  ggplot(vc_summary, aes(x = Line, y = h2, fill = Line)) +
    geom_bar(stat = "identity", width = 0.6) +
    geom_errorbar(aes(ymin = h2 - h2_SE, ymax = h2 + h2_SE), width = 0.2) +
    geom_text(aes(label = h2), vjust = -0.8, size = 5, fontface = "bold") +
    geom_hline(yintercept = 0.45, linetype = "dashed", color = "orange", linewidth = 1) +
    annotate("text", x = 0.6, y = 0.47, label = "Typical upper range (hยฒ = 0.45)", hjust = 0, color = "orange", size = 3.5) +
    scale_fill_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                                  "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
    labs(title = "Heritability Estimates for Body Weight at 16 Weeks",
         subtitle = "Error bars show ยฑ 1 SE | Dashed line indicates typical upper range",
         x = "Line", y = "Heritability (hยฒ)") +
    theme(legend.position = "none") +
    scale_y_continuous(limits = c(0, max(vc_summary$h2 + vc_summary$h2_SE, na.rm = TRUE) * 1.2),
                       expand = expansion(mult = c(0, 0.05)))
}

3.2 EBV Summary Statistics

if (!is.null(ebv_stats) && nrow(ebv_stats) > 0) {
  ebv_stats %>%
    kable(col.names = c("Line", "N", "Mean EBV (g)", "SD (g)", "Min (g)", "Max (g)", "Mean F%", "N Inbred"),
          align = c("l", rep("r", 7))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#2d3748", color = "white")
} else {
  cat("EBV data not available.\n")
}
Line N Mean EBV (g) SD (g) Min (g) Max (g) Mean F% N Inbred
BIL-20 685 35.7 67.7 -217.2 207.8 1.37 75
LIL-20 798 71.1 111.9 -307.5 315.8 0.64 26
NHIL-20 641 175.6 298.0 -668.4 842.7 1.33 59
WIL-20 600 57.4 95.8 -243.0 293.7 1.17 46
if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  ggplot(ebv_all, aes(x = EBV, fill = Line)) +
    geom_histogram(bins = 30, alpha = 0.8, color = "white") +
    geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
    facet_wrap(~Line, scales = "free_y") +
    scale_fill_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                                  "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
    labs(title = "EBV Distribution for Body Weight at 16 Weeks",
         subtitle = "Red dashed line indicates EBV = 0",
         x = "EBV (g)", y = "Count") +
    theme(legend.position = "none")
}

3.3 Genetic Trend

all_pheno_with_id <- bind_rows(
  lil_data %>% select(Line, ID, Generation, BW_16wk),
  bil_data %>% select(Line, ID, Generation, BW_16wk),
  nhil_data %>% select(Line, ID, Generation, BW_16wk),
  wil_data %>% select(Line, ID, Generation, BW_16wk)
)

if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  ebv_with_gen <- ebv_all %>%
    mutate(ID_original = as.character(ID_original)) %>%
    left_join(all_pheno_with_id %>% select(Line, ID, Generation), 
              by = c("Line" = "Line", "ID_original" = "ID")) %>%
    filter(!is.na(Generation))
  
  genetic_trend <- ebv_with_gen %>%
    group_by(Line, Generation) %>%
    summarise(
      N = n(),
      Mean_EBV = mean(EBV, na.rm = TRUE),
      SE_EBV = sd(EBV, na.rm = TRUE) / sqrt(n()),
      .groups = "drop"
    ) %>%
    mutate(Generation = paste0("G", Generation))
}
if (exists("genetic_trend") && nrow(genetic_trend) > 0) {
  n_wide <- genetic_trend %>%
    select(Line, Generation, N) %>%
    pivot_wider(names_from = Generation, values_from = N, names_prefix = "N_")
  
  ebv_wide <- genetic_trend %>%
    select(Line, Generation, Mean_EBV) %>%
    mutate(Mean_EBV = round(Mean_EBV, 1)) %>%
    pivot_wider(names_from = Generation, values_from = Mean_EBV, names_prefix = "Mean_EBV_")
  
  genetic_trend_wide <- n_wide %>%
    left_join(ebv_wide, by = "Line")
  
  gen_cols <- sort(unique(genetic_trend$Generation))
  col_order <- c("Line")
  for (g in gen_cols) {
    col_order <- c(col_order, paste0("N_", g), paste0("Mean_EBV_", g))
  }
  col_order <- col_order[col_order %in% colnames(genetic_trend_wide)]
  genetic_trend_wide <- genetic_trend_wide %>% select(all_of(col_order))
  
  genetic_trend_wide %>%
    kable(align = c("l", rep("r", ncol(genetic_trend_wide) - 1))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#805ad5", color = "white")
}
Line N_G0 Mean_EBV_G0 N_G1 Mean_EBV_G1 N_G2 Mean_EBV_G2
BIL-20 269 0.0 124 48.2 292 63.4
LIL-20 291 -2.7 134 68.4 362 131.4
NHIL-20 333 -3.0 93 278.5 215 407.7
WIL-20 237 -0.9 87 62.4 275 105.5
if (exists("genetic_trend") && nrow(genetic_trend) > 0) {
  ggplot(genetic_trend, aes(x = Generation, y = Mean_EBV, color = Line, group = Line)) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
    geom_line(linewidth = 1.2) +
    geom_point(size = 4) +
    geom_errorbar(aes(ymin = Mean_EBV - SE_EBV, ymax = Mean_EBV + SE_EBV), 
                  width = 0.1, linewidth = 0.8) +
    geom_text(aes(label = round(Mean_EBV, 1)), vjust = -1.5, size = 3.5, show.legend = FALSE) +
    scale_color_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                                   "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
    labs(title = "Genetic Trend: Mean EBV for Body Weight at 16 Weeks by Generation",
         subtitle = "Error bars represent ยฑ 1 standard error | Dashed line indicates EBV = 0",
         x = "Generation", y = "Mean EBV (g)", color = "Line") +
    theme(legend.position = "bottom") +
    scale_y_continuous(expand = expansion(mult = c(0.15, 0.15)))
}

if (exists("genetic_trend") && nrow(genetic_trend) > 0) {
  library(gridExtra)
  
  p1 <- ggplot(pheno_trend, aes(x = Generation, y = Mean_BW16, color = Line, group = Line)) +
    geom_line(linewidth = 1.2) +
    geom_point(size = 3) +
    scale_color_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                                   "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
    labs(title = "Phenotypic Trend",
         x = "Generation", y = "Mean BW_16wk (g)") +
    theme(legend.position = "none")
  
  p2 <- ggplot(genetic_trend, aes(x = Generation, y = Mean_EBV, color = Line, group = Line)) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
    geom_line(linewidth = 1.2) +
    geom_point(size = 3) +
    scale_color_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                                   "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
    labs(title = "Genetic Trend",
         x = "Generation", y = "Mean EBV (g)") +
    theme(legend.position = "bottom")
  
  grid.arrange(p1, p2, ncol = 2, 
               top = grid::textGrob("Phenotypic vs Genetic Trend Comparison", 
                                    gp = grid::gpar(fontsize = 14, fontface = "bold")))
}

๐Ÿ“Š Genetic Trend Interpretation

The genetic trend shows the change in mean Estimated Breeding Value (EBV) across generations, representing the genetic progress achieved through selection.

Key points:

  • Positive trend (upward slope): Indicates genetic improvement in body weight at 16 weeks
  • G0 as baseline: Foundation stock typically centers around EBV = 0
  • G1 and G2 progress: Positive mean EBVs in later generations indicate selection has been effective
  • Difference from phenotypic trend: Genetic trend isolates the genetic component of change, removing environmental effects

3.4 Top Breeding Animals - Selection Candidates

๐Ÿ† Elite Breeding Stock Identification

The table below identifies the top 10 animals per line based on their Estimated Breeding Values (EBVs) for body weight at 16 weeks. These individuals represent the genetic elite of each line and are prime candidates for retention as breeding sires and dams for the next generation.

Selection Question: Which individuals should be kept as breeding animals?

The animals listed below have the highest genetic merit for body weight. Selecting these individuals as parents of the next generation will maximize genetic progress while maintaining genetic diversity.

if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  top_animals <- ebv_all %>%
    group_by(Line) %>%
    arrange(desc(EBV)) %>%
    slice_head(n = 10) %>%
    mutate(Rank = row_number()) %>%
    ungroup() %>%
    select(Line, Rank, ID_original, EBV, Accuracy, Inbreeding_pct) %>%
    mutate(
      EBV = round(EBV, 1),
      Accuracy = ifelse(is.na(Accuracy), "-", round(Accuracy, 2)),
      Inbreeding_pct = ifelse(is.na(Inbreeding_pct), "-", round(Inbreeding_pct, 2))
    )
  
  top_animals %>%
    kable(col.names = c("Line", "Rank", "Animal ID", "EBV (g)", "Accuracy", "Inbreeding %"),
          align = c("l", "c", "l", "r", "r", "r")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#276749", color = "white") %>%
    row_spec(which(top_animals$Rank == 1), bold = TRUE, background = "#c6f6d5")
}
Line Rank Animal ID EBV (g) Accuracy Inbreeding %
BIL-20 1 C-7908 207.8 0.71 12.5
BIL-20 2 1222 204.3 0.54 0.0
BIL-20 3 C-7918 192.8 0.65 0.0
BIL-20 4 B-3407 189.9 0.78 0.0
BIL-20 5 C-7943 188.5 0.66 0.0
BIL-20 6 C-3263 184.5 0.66 0.0
BIL-20 7 1426 183.2 0.54 0.0
BIL-20 8 C-7075 178.4 0.71 12.5
BIL-20 9 1287 174.6 0.54 0.0
BIL-20 10 B-3091 169.2 0.68 0.0
LIL-20 1 C-7742 315.8 0.80 0.0
LIL-20 2 C-7562 299.9 0.81 0.0
LIL-20 3 C-7660 297.5 0.81 0.0
LIL-20 4 C-7565 297.0 0.81 0.0
LIL-20 5 C-8084 292.5 0.80 0.0
LIL-20 6 C-8158 285.6 0.81 0.0
LIL-20 7 C-8142 283.4 0.80 0.0
LIL-20 8 C-8147 281.1 0.81 0.0
LIL-20 9 C-7719 277.3 0.80 0.0
LIL-20 10 B-3323 276.3 0.91 0.0
NHIL-20 1 C-7171 842.7 0.90 25.0
NHIL-20 2 C-7136 842.5 0.89 12.5
NHIL-20 3 C-7178 805.5 0.87 0.0
NHIL-20 4 C-7180 784.1 0.87 0.0
NHIL-20 5 C-7981 774.7 0.89 12.5
NHIL-20 6 C-7137 756.8 0.89 12.5
NHIL-20 7 C-7128 727.8 0.87 0.0
NHIL-20 8 C-7181 724.8 0.87 0.0
NHIL-20 9 2384 717.4 0.87 0.0
NHIL-20 10 C-7966 693.7 0.87 0.0
WIL-20 1 C-7498 293.7 0.71 0.0
WIL-20 2 B-3346 288.2 0.78 0.0
WIL-20 3 C-8280 287.2 0.72 0.0
WIL-20 4 C-7497 277.2 0.71 0.0
WIL-20 5 C-7435 276.1 0.72 0.0
WIL-20 6 B-3236 269.1 0.86 0.0
WIL-20 7 C-3153 265.1 0.71 0.0
WIL-20 8 C-7510 263.7 0.72 0.0
WIL-20 9 B-3368 262.6 0.78 0.0
WIL-20 10 C-8283 261.2 0.72 0.0
if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  top_summary <- ebv_all %>%
    group_by(Line) %>%
    arrange(desc(EBV)) %>%
    slice_head(n = 10) %>%
    summarise(
      Top10_Mean_EBV = round(mean(EBV), 1),
      Top10_Min_EBV = round(min(EBV), 1),
      Top10_Max_EBV = round(max(EBV), 1),
      Selection_Differential = round(mean(EBV) - 0, 1),
      .groups = "drop"
    )
  
  cat("\n")
  top_summary %>%
    kable(col.names = c("Line", "Mean EBV (Top 10)", "Min EBV", "Max EBV", "Selection Differential (g)"),
          align = c("l", rep("r", 4))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
    row_spec(0, bold = TRUE, background = "#2d3748", color = "white")
}
Line Mean EBV (Top 10) Min EBV Max EBV Selection Differential (g)
BIL-20 187.3 169.2 207.8 187.3
LIL-20 290.6 276.3 315.8 290.6
NHIL-20 767.0 693.7 842.7 767.0
WIL-20 274.4 261.2 293.7 274.4

4 Egg Production Traits

๐Ÿšซ Egg Traits Not Analyzed Genetically

Genetic analysis of egg production traits (age at first egg, egg number at 45 weeks, egg weight) was not conducted due to insufficient data for reliable variance component estimation.

4.1 Data Availability

pheno_stats %>%
  select(Line, Egg_N, Egg_Pct_Females) %>%
  mutate(Status = ifelse(Egg_N < 100, "Insufficient", "Available")) %>%
  kable(col.names = c("Line", "Egg Records (N)", "% of Females", "Status"),
        align = c("l", rep("r", 2), "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#e53e3e", color = "white")
Line Egg Records (N) % of Females Status
LIL-20 46 9.7 Insufficient
BIL-20 43 12.3 Insufficient
NHIL-20 31 8.6 Insufficient
WIL-20 28 9.7 Insufficient

4.2 Reasons for Excluding Egg Traits

  1. Sample size limitations: With fewer than 100 records per line in most cases, variance component estimation would produce unreliable and imprecise heritability estimates.

  2. Pedigree connectivity: Limited number of dam families with egg records reduces the ability to separate genetic from environmental effects.

  3. Generational coverage: Egg data is primarily available from G1 females, with G2 recording still in progress.

  4. Standard error concerns: Expected standard errors would exceed acceptable thresholds for publication-quality estimates.


5 Limitations

โš ๏ธ Important Limitations to Consider

The following limitations should be considered when interpreting the results of this evaluation:

  1. Small sample sizes: Population sizes range from 662 to 1215 animals per line, which limits statistical power and may affect variance component estimates.

  2. Single trait analysis: The current evaluation focuses only on body weight at 16 weeks. Other economically important traits (egg production, survival, feed efficiency) require additional data collection before they can be included in a comprehensive selection index.

  3. Limited pedigree depth: With only three generations (G0, G1, G2), the pedigree structure provides limited information for genetic evaluation. G0 founders have unknown parents by design.

  4. Limited relatedness: The number of unique sires and dams is relatively small, creating unbalanced family structures and potentially affecting heritability estimates.

  5. No genomic data: Pedigree-based relationships may not fully capture actual genetic relationships, and genomic selection is not yet possible.


6 Conclusions and Future Directions

6.1 Key Findings

โœ… Summary of Results

Measure Finding
Total birds evaluated 3,790 across 4 lines
Generations included G0, G1, and G2
Survival rates All lines > 92%
Mean BW at 16 weeks 981g to 1354g
Heritability range 0.298 to 0.76
Genetic trend Positive for most lines from G0 to G2

๐Ÿ“‹ Note on Selection Trait

The current breeding program focuses on a single selection trait: body weight at 16 weeks (BW_16wk). This trait was chosen as the primary selection criterion due to its economic importance and ease of measurement. While egg production traits are being recorded, they are not yet incorporated into the breeding objective due to insufficient data for reliable genetic evaluation.

6.3 Value for Stakeholders

๐Ÿค Potential Future Applications

As this breeding program matures and more comprehensive data becomes available, the genetic evaluation framework can provide value for various stakeholders:

For Internal Program Management:

  • Evidence-based selection decisions
  • Tracking of genetic progress over time
  • Identification of breeding structure issues

For Future Partnerships:

  • Transparent documentation of genetic improvement
  • Genetically evaluated breeding stock with EBV records
  • Foundation for collaboration with other breeding programs

For Capacity Building:

  • Training opportunities in quantitative genetics methods
  • Development of local expertise in genetic evaluation
  • Replicable framework for other species/programs