library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(knitr)
library(kableExtra)
library(scales)

theme_report <- function() {
  theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(face = "bold", size = 14, hjust = 0, color = "#1a365d"),
      plot.subtitle = element_text(size = 11, hjust = 0, color = "#4a5568"),
      legend.position = "bottom",
      panel.grid.minor = element_blank(),
      axis.title = element_text(face = "bold", size = 11),
      strip.text = element_text(face = "bold", size = 11)
    )
}
theme_set(theme_report())

# =============================================================================
# SET YOUR WORKING DIRECTORY - MODIFY IF NEEDED
# =============================================================================
working_dir <- "C:/Users/SAlemu/OneDrive - CGIAR/Documents/Poultryaudit/Dataset"
setwd(working_dir)

# File paths for phenotypic data
pheno_file_3lines <- "Additional_Poultry dataset_audit.xlsx"
pheno_file_wil <- "Dataset_evaluation_corrected.xlsx"

# Base directory for WOMBAT results
wombat_base <- "Analysis_with_wombat2"
model_dir <- "Model1_Animal_YearMonth"

# =============================================================================
# LOAD PHENOTYPIC DATA FOR ALL 4 LINES
# =============================================================================

clean_names <- function(df) {
  colnames(df) <- gsub(" ", "_", colnames(df))
  return(df)
}

convert_types <- function(df) {
  df$Generation <- as.character(df$Generation)
  if ("Batch" %in% colnames(df)) df$Batch <- as.character(df$Batch)
  df$ID <- as.character(df$ID)
  if ("Sire_ID" %in% colnames(df)) df$Sire_ID <- as.character(df$Sire_ID)
  if ("Dam_ID" %in% colnames(df)) df$Dam_ID <- as.character(df$Dam_ID)
  return(df)
}

# Load LIL, BIL, NHIL from one file
lil_data <- read_excel(pheno_file_3lines, sheet = "LIL") %>% clean_names() %>% convert_types()
bil_data <- read_excel(pheno_file_3lines, sheet = "BIL") %>% clean_names() %>% convert_types()
nhil_data <- read_excel(pheno_file_3lines, sheet = "NHIL") %>% clean_names() %>% convert_types()

# Load WIL from separate file
wil_data <- read_excel(pheno_file_wil) %>% clean_names() %>% convert_types()

# Add Line identifier
lil_data$Line <- "LIL-20"
bil_data$Line <- "BIL-20"
nhil_data$Line <- "NHIL-20"
wil_data$Line <- "WIL-20"

# =============================================================================
# EXTRACT GENETIC PARAMETERS FROM WOMBAT OUTPUT
# =============================================================================

lines <- c("LIL", "BIL", "NHIL", "WIL")
all_vc <- list()
all_ebv <- list()

extract_wombat <- function(line_code) {
  sum_est_file <- file.path(wombat_base, line_code, model_dir, "SumEstimates.out")
  ebv_file <- file.path(wombat_base, line_code, model_dir, "RnSoln_animal.dat")
  id_map_file <- file.path(wombat_base, line_code, "ID_mapping.csv")
  
  result <- list(vc = NULL, ebv = NULL)
  
  if (!file.exists(sum_est_file)) {
    cat("  Warning:", line_code, "- SumEstimates.out not found\n")
    return(result)
  }
  
  # ----- EXTRACT VARIANCE COMPONENTS -----
  sum_lines <- readLines(sum_est_file)
  
  covs_a_idx <- grep("COVS A 1 1.*vrat", sum_lines)
  
  if (length(covs_a_idx) > 0) {
    covs_a_line <- sum_lines[covs_a_idx[1]]
    nums <- as.numeric(unlist(regmatches(covs_a_line, gregexpr("[0-9]+\\.?[0-9]*", covs_a_line))))
    
    if (length(nums) >= 7) {
      Va <- nums[4]
      Va_SE <- nums[5]
      h2 <- nums[6]
      h2_SE <- nums[7]
    }
  }
  
  covs_z_idx <- grep("COVS Z 1 1.*vrat", sum_lines)
  if (length(covs_z_idx) > 0) {
    covs_z_line <- sum_lines[covs_z_idx[1]]
    nums_z <- as.numeric(unlist(regmatches(covs_z_line, gregexpr("[0-9]+\\.?[0-9]*", covs_z_line))))
    if (length(nums_z) >= 5) {
      Ve <- nums_z[4]
      Ve_SE <- nums_z[5]
    }
  }
  
  covs_t_idx <- grep("COVS T 1 1", sum_lines)
  if (length(covs_t_idx) > 0) {
    covs_t_line <- sum_lines[covs_t_idx[1]]
    nums_t <- as.numeric(unlist(regmatches(covs_t_line, gregexpr("[0-9]+\\.?[0-9]*", covs_t_line))))
    if (length(nums_t) >= 5) {
      Vp <- nums_t[4]
    }
  }
  
  rec_idx <- grep("No. of records", sum_lines)
  if (length(rec_idx) > 0) {
    rec_line <- sum_lines[rec_idx[1]]
    nums_rec <- as.numeric(unlist(regmatches(rec_line, gregexpr("[0-9]+", rec_line))))
    N_rec <- nums_rec[1]
  }
  
  if (exists("h2")) {
    result$vc <- data.frame(
      Line = paste0(line_code, "-20"),
      N = ifelse(exists("N_rec"), N_rec, NA),
      Va = ifelse(exists("Va"), round(Va, 1), NA),
      Va_SE = ifelse(exists("Va_SE"), round(Va_SE, 1), NA),
      Ve = ifelse(exists("Ve"), round(Ve, 1), NA),
      Ve_SE = ifelse(exists("Ve_SE"), round(Ve_SE, 1), NA),
      Vp = ifelse(exists("Vp"), round(Vp, 1), NA),
      h2 = round(h2, 3),
      h2_SE = round(h2_SE, 3)
    )
  }
  
  # ----- EXTRACT EBVs -----
  if (file.exists(ebv_file) && file.exists(id_map_file)) {
    ebv_lines <- readLines(ebv_file)
    header_idx <- grep("Run_No", ebv_lines)
    
    if (length(header_idx) > 0) {
      data_start <- header_idx[1] + 1
      data_lines <- ebv_lines[data_start:length(ebv_lines)]
      data_lines <- data_lines[nchar(trimws(data_lines)) > 0]
      
      ebv_data <- read.table(text = data_lines, header = FALSE, fill = TRUE)
      
      if (ncol(ebv_data) >= 7) {
        colnames(ebv_data) <- c("Run_No", "ID_num", "Trait", "EBV", "SE_EBV", "Accuracy", "Inbreeding_pct")
      } else {
        colnames(ebv_data) <- c("Run_No", "ID_num", "Trait", "EBV", "Inbreeding_pct")[1:ncol(ebv_data)]
      }
      
      id_map <- read.csv(id_map_file, stringsAsFactors = FALSE)
      ebv_merged <- merge(ebv_data, id_map, by.x = "ID_num", by.y = "ID_num", all.x = TRUE)
      
      result$ebv <- data.frame(
        Line = paste0(line_code, "-20"),
        ID_original = as.character(ebv_merged$ID_orig),
        ID_wombat = ebv_merged$ID_num,
        EBV = ebv_merged$EBV,
        SE_EBV = if ("SE_EBV" %in% colnames(ebv_merged)) ebv_merged$SE_EBV else NA,
        Accuracy = if ("Accuracy" %in% colnames(ebv_merged)) ebv_merged$Accuracy else NA,
        Inbreeding_pct = if ("Inbreeding_pct" %in% colnames(ebv_merged)) ebv_merged$Inbreeding_pct else NA
      )
    }
  }
  
  return(result)
}

for (ln in lines) {
  res <- extract_wombat(ln)
  if (!is.null(res$vc)) all_vc[[ln]] <- res$vc
  if (!is.null(res$ebv)) all_ebv[[ln]] <- res$ebv
}

vc_summary <- do.call(rbind, all_vc)
ebv_all <- do.call(rbind, all_ebv)

# =============================================================================
# COMPUTE PHENOTYPIC SUMMARY STATISTICS
# =============================================================================

compute_pheno_stats <- function(data, line_name) {
  total <- nrow(data)
  g0 <- sum(data$Generation == "0", na.rm = TRUE)
  g1 <- sum(data$Generation == "1", na.rm = TRUE)
  g2 <- sum(data$Generation == "2", na.rm = TRUE)
  
  deaths <- sum(!is.na(data$Cull_Reason) & data$Cull_Reason != "", na.rm = TRUE)
  survival <- round((1 - deaths / total) * 100, 1)
  
  bw16_n <- sum(!is.na(data$BW_16wk))
  bw16_mean <- round(mean(data$BW_16wk, na.rm = TRUE), 0)
  bw16_sd <- round(sd(data$BW_16wk, na.rm = TRUE), 0)
  bw16_min <- round(min(data$BW_16wk, na.rm = TRUE), 0)
  bw16_max <- round(max(data$BW_16wk, na.rm = TRUE), 0)
  bw16_cv <- round(bw16_sd / bw16_mean * 100, 1)
  
  male_bw <- data %>% filter(Sex == "M") %>% pull(BW_16wk) %>% mean(na.rm = TRUE) %>% round(0)
  female_bw <- data %>% filter(Sex == "F") %>% pull(BW_16wk) %>% mean(na.rm = TRUE) %>% round(0)
  dimorphism <- round((male_bw - female_bw) / female_bw * 100, 1)
  
  afe_col <- colnames(data)[grepl("AFE", colnames(data), ignore.case = TRUE)][1]
  if (!is.na(afe_col)) {
    egg_n <- sum(!is.na(data[[afe_col]]))
  } else {
    egg_n <- 0
  }
  
  n_females <- sum(data$Sex == "F", na.rm = TRUE)
  egg_pct <- ifelse(n_females > 0, round(egg_n / n_females * 100, 1), 0)
  
  data.frame(
    Line = line_name,
    Total = total,
    G0 = g0,
    G1 = g1,
    G2 = g2,
    Survival_Pct = survival,
    BW16_N = bw16_n,
    BW16_Mean = bw16_mean,
    BW16_SD = bw16_sd,
    BW16_Min = bw16_min,
    BW16_Max = bw16_max,
    BW16_CV = bw16_cv,
    Male_BW16 = male_bw,
    Female_BW16 = female_bw,
    Dimorphism_Pct = dimorphism,
    Egg_N = egg_n,
    Egg_Pct_Females = egg_pct
  )
}

pheno_stats <- bind_rows(
  compute_pheno_stats(lil_data, "LIL-20"),
  compute_pheno_stats(bil_data, "BIL-20"),
  compute_pheno_stats(nhil_data, "NHIL-20"),
  compute_pheno_stats(wil_data, "WIL-20")
)

ebv_stats <- ebv_all %>%
  group_by(Line) %>%
  summarise(
    N = n(),
    Mean_EBV = round(mean(EBV, na.rm = TRUE), 1),
    SD_EBV = round(sd(EBV, na.rm = TRUE), 1),
    Min_EBV = round(min(EBV, na.rm = TRUE), 1),
    Max_EBV = round(max(EBV, na.rm = TRUE), 1),
    Mean_F = round(mean(Inbreeding_pct, na.rm = TRUE), 2),
    N_Inbred = sum(Inbreeding_pct > 0, na.rm = TRUE),
    .groups = "drop"
  )

total_birds <- sum(pheno_stats$Total)

1 Executive Summary

This report presents descriptive statistics and genetic analysis results for four ILRI poultry lines: LIL-20, BIL-20, NHIL-20, and WIL-20. The evaluation encompasses phenotypic characterization across three generations (G0, G1, G2) and genetic parameter estimation using WOMBAT software for body weight at 16 weeks.

3,790

Total Birds

All 4 Lines Combined

1215

LIL-20

797

BIL-20

1116

NHIL-20

662

WIL-20

📋 Internal Audit Purpose

This genetic evaluation was initiated for internal audit purposes to assess the current status of ILRI’s poultry breeding program. The analysis establishes baseline genetic parameters and identifies areas for improvement. As the program matures and more data becomes available, this evaluation process will be updated in real time to track genetic progress and inform breeding decisions.

Current Scope:

Single trait analysis: Body weight at 16 weeks (BW_16wk)
Three generations evaluated: G0 (foundation), G1, and G2
Four breeding lines: LIL-20, BIL-20, NHIL-20, and WIL-20

📅 Recommended Evaluation Schedule

To ensure continuous genetic progress and timely selection decisions, this genetic evaluation should be conducted twice per year (biannually). Regular evaluations enable:

Tracking of genetic trends over time
Identification of top breeding candidates before mating decisions
Early detection of any issues in the breeding structure
Updated EBV rankings for informed culling and selection

2 Phenotypic Summary

2.1 Population Structure

pheno_stats %>%
  select(Line, Total, G0, G1, G2, Survival_Pct) %>%
  bind_rows(
    data.frame(
      Line = "TOTAL",
      Total = sum(pheno_stats$Total),
      G0 = sum(pheno_stats$G0),
      G1 = sum(pheno_stats$G1),
      G2 = sum(pheno_stats$G2),
      Survival_Pct = round(mean(pheno_stats$Survival_Pct), 1)
    )
  ) %>%
  kable(col.names = c("Line", "Total Birds", "G0", "G1", "G2", "Survival %"),
        align = c("l", rep("r", 5))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#2d3748", color = "white") %>%
  row_spec(5, bold = TRUE, background = "#f7fafc")

Line	Total Birds	G0	G1	G2	Survival %
LIL-20	1215	612	157	446	74.7
BIL-20	797	300	155	342	92.8
NHIL-20	1116	721	151	244	62.5
WIL-20	662	264	92	306	94.9
TOTAL	3790	1897	555	1338	81.2

pheno_stats %>%
  select(Line, G0, G1, G2) %>%
  pivot_longer(-Line, names_to = "Generation", values_to = "N") %>%
  ggplot(aes(x = Line, y = N, fill = Generation)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_text(aes(label = N), position = position_dodge(width = 0.7), vjust = -0.5, size = 3.5) +
  scale_fill_manual(values = c("G0" = "#3182ce", "G1" = "#38a169", "G2" = "#805ad5")) +
  labs(title = "Population Structure by Line and Generation",
       x = "Line", y = "Number of Birds") +
  scale_y_continuous(expand = expansion(mult = c(0, 0.15)))

2.2 Body Weight at 16 Weeks

pheno_stats %>%
  select(Line, BW16_N, BW16_Mean, BW16_SD, BW16_Min, BW16_Max, BW16_CV) %>%
  kable(col.names = c("Line", "N", "Mean (g)", "SD (g)", "Min (g)", "Max (g)", "CV %"),
        align = c("l", rep("r", 6))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#3182ce", color = "white")

Line	N	Mean (g)	SD (g)	Min (g)	Max (g)	CV %
LIL-20	793	981	224	231	1535	22.8
BIL-20	686	1354	281	392	2254	20.8
NHIL-20	641	1196	409	336	2204	34.2
WIL-20	601	1148	272	359	1875	23.7

all_pheno <- bind_rows(
  lil_data %>% select(Line, BW_16wk),
  bil_data %>% select(Line, BW_16wk),
  nhil_data %>% select(Line, BW_16wk),
  wil_data %>% select(Line, BW_16wk)
)

ggplot(all_pheno %>% filter(!is.na(BW_16wk)), aes(x = Line, y = BW_16wk, fill = Line)) +
  geom_boxplot(alpha = 0.8, outlier.shape = 21) +
  scale_fill_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                                "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
  labs(title = "Body Weight at 16 Weeks - Comparison Across Lines",
       x = "Line", y = "Body Weight (g)") +
  theme(legend.position = "none")

2.3 Phenotypic Trend

pheno_trend <- bind_rows(
  lil_data %>% select(Line, Generation, BW_16wk),
  bil_data %>% select(Line, Generation, BW_16wk),
  nhil_data %>% select(Line, Generation, BW_16wk),
  wil_data %>% select(Line, Generation, BW_16wk)
) %>%
  filter(!is.na(BW_16wk)) %>%
  group_by(Line, Generation) %>%
  summarise(
    N = n(),
    Mean_BW16 = mean(BW_16wk, na.rm = TRUE),
    SE_BW16 = sd(BW_16wk, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  ) %>%
  mutate(Generation = paste0("G", Generation))

pheno_trend_wide <- pheno_trend %>%
  select(Line, Generation, N, Mean_BW16) %>%
  mutate(Mean_BW16 = round(Mean_BW16, 0)) %>%
  pivot_wider(names_from = Generation, values_from = c(N, Mean_BW16), names_sep = "_")

pheno_trend_wide %>%
  kable(align = c("l", rep("r", ncol(pheno_trend_wide) - 1))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#3182ce", color = "white")

Line	N_G0	N_G1	N_G2	Mean_BW16_G0	Mean_BW16_G1	Mean_BW16_G2
BIL-20	269	124	293	1422	1346	1294
LIL-20	295	135	363	933	949	1031
NHIL-20	334	91	216	1062	1171	1415
WIL-20	237	87	277	1085	1112	1214

ggplot(pheno_trend, aes(x = Generation, y = Mean_BW16, color = Line, group = Line)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = Mean_BW16 - SE_BW16, ymax = Mean_BW16 + SE_BW16), 
                width = 0.1, linewidth = 0.8) +
  geom_text(aes(label = round(Mean_BW16, 0)), vjust = -1.5, size = 3.5, show.legend = FALSE) +
  scale_color_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                                 "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
  labs(title = "Phenotypic Trend: Mean Body Weight at 16 Weeks by Generation",
       subtitle = "Error bars represent ± 1 standard error",
       x = "Generation", y = "Mean Body Weight (g)", color = "Line") +
  theme(legend.position = "bottom") +
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.15)))

📈 Phenotypic Trend Interpretation

The phenotypic trend shows the change in mean body weight at 16 weeks across generations. An upward trend suggests phenotypic improvement, though this includes both genetic and environmental effects. Note that G0 represents foundation stock, while G1 and G2 are subsequent breeding generations.

2.4 Sexual Dimorphism

pheno_stats %>%
  select(Line, Male_BW16, Female_BW16, Dimorphism_Pct) %>%
  kable(col.names = c("Line", "Male BW_16wk (g)", "Female BW_16wk (g)", "Dimorphism (%)"),
        align = c("l", rep("r", 3))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2d3748", color = "white")

Line	Male BW_16wk (g)	Female BW_16wk (g)	Dimorphism (%)
LIL-20	1125	866	29.9
BIL-20	1494	1200	24.5
NHIL-20	1427	995	43.4
WIL-20	1282	996	28.7

3 Genetic Analysis

3.1 Variance Components and Heritability

ℹ️ Analysis Method

Genetic parameters were estimated using WOMBAT software with an animal model. The model included the following fixed effects:

Sex (male/female)
Batch (hatching batch)
Year (year of hatch)
Month (month of hatch)

The trait analyzed is body weight at 16 weeks (BW_16wk).

if (!is.null(vc_summary) && nrow(vc_summary) > 0) {
  vc_summary %>%
    kable(col.names = c("Line", "N", "Va", "SE(Va)", "Ve", "SE(Ve)", "Vp", "h²", "SE(h²)"),
          align = c("l", rep("r", 8))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#805ad5", color = "white") %>%
    row_spec(which(vc_summary$h2 > 0.6), bold = TRUE, background = "#fed7d7")
} else {
  cat("WOMBAT results not available. Please ensure Analysis_with_wombat2 folder contains completed analyses.\n")
}

	Line	N	Va	SE(Va)	Ve	SE(Ve)	Vp	h²	SE(h²)
LIL	LIL-20	787	19579.0	4163.9	14212.6	2787.6	33791.6	0.579	0.097
BIL	BIL-20	685	12995.1	5250.1	30647.0	4169.6	43642.2	0.298	0.110
NHIL	NHIL-20	638	89703.6	0.0	28389.8	4796.9	118093.0	0.760	0.031
WIL	WIL-20	599	15082.8	5071.0	24419.8	3824.7	39502.6	0.382	0.112

if (!is.null(vc_summary) && nrow(vc_summary) > 0) {
  ggplot(vc_summary, aes(x = Line, y = h2, fill = Line)) +
    geom_bar(stat = "identity", width = 0.6) +
    geom_errorbar(aes(ymin = h2 - h2_SE, ymax = h2 + h2_SE), width = 0.2) +
    geom_text(aes(label = h2), vjust = -0.8, size = 5, fontface = "bold") +
    geom_hline(yintercept = 0.45, linetype = "dashed", color = "orange", linewidth = 1) +
    annotate("text", x = 0.6, y = 0.47, label = "Typical upper range (h² = 0.45)", hjust = 0, color = "orange", size = 3.5) +
    scale_fill_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                                  "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
    labs(title = "Heritability Estimates for Body Weight at 16 Weeks",
         subtitle = "Error bars show ± 1 SE | Dashed line indicates typical upper range",
         x = "Line", y = "Heritability (h²)") +
    theme(legend.position = "none") +
    scale_y_continuous(limits = c(0, max(vc_summary$h2 + vc_summary$h2_SE, na.rm = TRUE) * 1.2),
                       expand = expansion(mult = c(0, 0.05)))
}

3.2 EBV Summary Statistics

if (!is.null(ebv_stats) && nrow(ebv_stats) > 0) {
  ebv_stats %>%
    kable(col.names = c("Line", "N", "Mean EBV (g)", "SD (g)", "Min (g)", "Max (g)", "Mean F%", "N Inbred"),
          align = c("l", rep("r", 7))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#2d3748", color = "white")
} else {
  cat("EBV data not available.\n")
}

Line	N	Mean EBV (g)	SD (g)	Min (g)	Max (g)	Mean F%	N Inbred
BIL-20	685	35.7	67.7	-217.2	207.8	1.37	75
LIL-20	798	71.1	111.9	-307.5	315.8	0.64	26
NHIL-20	641	175.6	298.0	-668.4	842.7	1.33	59
WIL-20	600	57.4	95.8	-243.0	293.7	1.17	46

if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  ggplot(ebv_all, aes(x = EBV, fill = Line)) +
    geom_histogram(bins = 30, alpha = 0.8, color = "white") +
    geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
    facet_wrap(~Line, scales = "free_y") +
    scale_fill_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                                  "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
    labs(title = "EBV Distribution for Body Weight at 16 Weeks",
         subtitle = "Red dashed line indicates EBV = 0",
         x = "EBV (g)", y = "Count") +
    theme(legend.position = "none")
}

3.3 Genetic Trend

all_pheno_with_id <- bind_rows(
  lil_data %>% select(Line, ID, Generation, BW_16wk),
  bil_data %>% select(Line, ID, Generation, BW_16wk),
  nhil_data %>% select(Line, ID, Generation, BW_16wk),
  wil_data %>% select(Line, ID, Generation, BW_16wk)
)

if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  ebv_with_gen <- ebv_all %>%
    mutate(ID_original = as.character(ID_original)) %>%
    left_join(all_pheno_with_id %>% select(Line, ID, Generation), 
              by = c("Line" = "Line", "ID_original" = "ID")) %>%
    filter(!is.na(Generation))
  
  genetic_trend <- ebv_with_gen %>%
    group_by(Line, Generation) %>%
    summarise(
      N = n(),
      Mean_EBV = mean(EBV, na.rm = TRUE),
      SE_EBV = sd(EBV, na.rm = TRUE) / sqrt(n()),
      .groups = "drop"
    ) %>%
    mutate(Generation = paste0("G", Generation))
}

if (exists("genetic_trend") && nrow(genetic_trend) > 0) {
  n_wide <- genetic_trend %>%
    select(Line, Generation, N) %>%
    pivot_wider(names_from = Generation, values_from = N, names_prefix = "N_")
  
  ebv_wide <- genetic_trend %>%
    select(Line, Generation, Mean_EBV) %>%
    mutate(Mean_EBV = round(Mean_EBV, 1)) %>%
    pivot_wider(names_from = Generation, values_from = Mean_EBV, names_prefix = "Mean_EBV_")
  
  genetic_trend_wide <- n_wide %>%
    left_join(ebv_wide, by = "Line")
  
  gen_cols <- sort(unique(genetic_trend$Generation))
  col_order <- c("Line")
  for (g in gen_cols) {
    col_order <- c(col_order, paste0("N_", g), paste0("Mean_EBV_", g))
  }
  col_order <- col_order[col_order %in% colnames(genetic_trend_wide)]
  genetic_trend_wide <- genetic_trend_wide %>% select(all_of(col_order))
  
  genetic_trend_wide %>%
    kable(align = c("l", rep("r", ncol(genetic_trend_wide) - 1))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#805ad5", color = "white")
}

Line	N_G0	Mean_EBV_G0	N_G1	Mean_EBV_G1	N_G2	Mean_EBV_G2
BIL-20	269	0.0	124	48.2	292	63.4
LIL-20	291	-2.7	134	68.4	362	131.4
NHIL-20	333	-3.0	93	278.5	215	407.7
WIL-20	237	-0.9	87	62.4	275	105.5

if (exists("genetic_trend") && nrow(genetic_trend) > 0) {
  ggplot(genetic_trend, aes(x = Generation, y = Mean_EBV, color = Line, group = Line)) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
    geom_line(linewidth = 1.2) +
    geom_point(size = 4) +
    geom_errorbar(aes(ymin = Mean_EBV - SE_EBV, ymax = Mean_EBV + SE_EBV), 
                  width = 0.1, linewidth = 0.8) +
    geom_text(aes(label = round(Mean_EBV, 1)), vjust = -1.5, size = 3.5, show.legend = FALSE) +
    scale_color_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                                   "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
    labs(title = "Genetic Trend: Mean EBV for Body Weight at 16 Weeks by Generation",
         subtitle = "Error bars represent ± 1 standard error | Dashed line indicates EBV = 0",
         x = "Generation", y = "Mean EBV (g)", color = "Line") +
    theme(legend.position = "bottom") +
    scale_y_continuous(expand = expansion(mult = c(0.15, 0.15)))
}

if (exists("genetic_trend") && nrow(genetic_trend) > 0) {
  library(gridExtra)
  
  p1 <- ggplot(pheno_trend, aes(x = Generation, y = Mean_BW16, color = Line, group = Line)) +
    geom_line(linewidth = 1.2) +
    geom_point(size = 3) +
    scale_color_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                                   "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
    labs(title = "Phenotypic Trend",
         x = "Generation", y = "Mean BW_16wk (g)") +
    theme(legend.position = "none")
  
  p2 <- ggplot(genetic_trend, aes(x = Generation, y = Mean_EBV, color = Line, group = Line)) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
    geom_line(linewidth = 1.2) +
    geom_point(size = 3) +
    scale_color_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                                   "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
    labs(title = "Genetic Trend",
         x = "Generation", y = "Mean EBV (g)") +
    theme(legend.position = "bottom")
  
  grid.arrange(p1, p2, ncol = 2, 
               top = grid::textGrob("Phenotypic vs Genetic Trend Comparison", 
                                    gp = grid::gpar(fontsize = 14, fontface = "bold")))
}

📊 Genetic Trend Interpretation

The genetic trend shows the change in mean Estimated Breeding Value (EBV) across generations, representing the genetic progress achieved through selection.

Key points:

Positive trend (upward slope): Indicates genetic improvement in body weight at 16 weeks
G0 as baseline: Foundation stock typically centers around EBV = 0
G1 and G2 progress: Positive mean EBVs in later generations indicate selection has been effective
Difference from phenotypic trend: Genetic trend isolates the genetic component of change, removing environmental effects

3.4 Top Breeding Animals - Selection Candidates

🏆 Elite Breeding Stock Identification

The table below identifies the top 10 animals per line based on their Estimated Breeding Values (EBVs) for body weight at 16 weeks. These individuals represent the genetic elite of each line and are prime candidates for retention as breeding sires and dams for the next generation.

Selection Question: Which individuals should be kept as breeding animals?

The animals listed below have the highest genetic merit for body weight. Selecting these individuals as parents of the next generation will maximize genetic progress while maintaining genetic diversity.

if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  top_animals <- ebv_all %>%
    group_by(Line) %>%
    arrange(desc(EBV)) %>%
    slice_head(n = 10) %>%
    mutate(Rank = row_number()) %>%
    ungroup() %>%
    select(Line, Rank, ID_original, EBV, Accuracy, Inbreeding_pct) %>%
    mutate(
      EBV = round(EBV, 1),
      Accuracy = ifelse(is.na(Accuracy), "-", round(Accuracy, 2)),
      Inbreeding_pct = ifelse(is.na(Inbreeding_pct), "-", round(Inbreeding_pct, 2))
    )
  
  top_animals %>%
    kable(col.names = c("Line", "Rank", "Animal ID", "EBV (g)", "Accuracy", "Inbreeding %"),
          align = c("l", "c", "l", "r", "r", "r")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#276749", color = "white") %>%
    row_spec(which(top_animals$Rank == 1), bold = TRUE, background = "#c6f6d5")
}

Line	Rank	Animal ID	EBV (g)	Accuracy	Inbreeding %
BIL-20	1	C-7908	207.8	0.71	12.5
BIL-20	2	1222	204.3	0.54	0.0
BIL-20	3	C-7918	192.8	0.65	0.0
BIL-20	4	B-3407	189.9	0.78	0.0
BIL-20	5	C-7943	188.5	0.66	0.0
BIL-20	6	C-3263	184.5	0.66	0.0
BIL-20	7	1426	183.2	0.54	0.0
BIL-20	8	C-7075	178.4	0.71	12.5
BIL-20	9	1287	174.6	0.54	0.0
BIL-20	10	B-3091	169.2	0.68	0.0
LIL-20	1	C-7742	315.8	0.80	0.0
LIL-20	2	C-7562	299.9	0.81	0.0
LIL-20	3	C-7660	297.5	0.81	0.0
LIL-20	4	C-7565	297.0	0.81	0.0
LIL-20	5	C-8084	292.5	0.80	0.0
LIL-20	6	C-8158	285.6	0.81	0.0
LIL-20	7	C-8142	283.4	0.80	0.0
LIL-20	8	C-8147	281.1	0.81	0.0
LIL-20	9	C-7719	277.3	0.80	0.0
LIL-20	10	B-3323	276.3	0.91	0.0
NHIL-20	1	C-7171	842.7	0.90	25.0
NHIL-20	2	C-7136	842.5	0.89	12.5
NHIL-20	3	C-7178	805.5	0.87	0.0
NHIL-20	4	C-7180	784.1	0.87	0.0
NHIL-20	5	C-7981	774.7	0.89	12.5
NHIL-20	6	C-7137	756.8	0.89	12.5
NHIL-20	7	C-7128	727.8	0.87	0.0
NHIL-20	8	C-7181	724.8	0.87	0.0
NHIL-20	9	2384	717.4	0.87	0.0
NHIL-20	10	C-7966	693.7	0.87	0.0
WIL-20	1	C-7498	293.7	0.71	0.0
WIL-20	2	B-3346	288.2	0.78	0.0
WIL-20	3	C-8280	287.2	0.72	0.0
WIL-20	4	C-7497	277.2	0.71	0.0
WIL-20	5	C-7435	276.1	0.72	0.0
WIL-20	6	B-3236	269.1	0.86	0.0
WIL-20	7	C-3153	265.1	0.71	0.0
WIL-20	8	C-7510	263.7	0.72	0.0
WIL-20	9	B-3368	262.6	0.78	0.0
WIL-20	10	C-8283	261.2	0.72	0.0

if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  top_summary <- ebv_all %>%
    group_by(Line) %>%
    arrange(desc(EBV)) %>%
    slice_head(n = 10) %>%
    summarise(
      Top10_Mean_EBV = round(mean(EBV), 1),
      Top10_Min_EBV = round(min(EBV), 1),
      Top10_Max_EBV = round(max(EBV), 1),
      Selection_Differential = round(mean(EBV) - 0, 1),
      .groups = "drop"
    )
  
  cat("\n")
  top_summary %>%
    kable(col.names = c("Line", "Mean EBV (Top 10)", "Min EBV", "Max EBV", "Selection Differential (g)"),
          align = c("l", rep("r", 4))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
    row_spec(0, bold = TRUE, background = "#2d3748", color = "white")
}

Line	Mean EBV (Top 10)	Min EBV	Max EBV	Selection Differential (g)
BIL-20	187.3	169.2	207.8	187.3
LIL-20	290.6	276.3	315.8	290.6
NHIL-20	767.0	693.7	842.7	767.0
WIL-20	274.4	261.2	293.7	274.4

4 Egg Production Traits

🚫 Egg Traits Not Analyzed Genetically

Genetic analysis of egg production traits (age at first egg, egg number at 45 weeks, egg weight) was not conducted due to insufficient data for reliable variance component estimation.

4.1 Data Availability

pheno_stats %>%
  select(Line, Egg_N, Egg_Pct_Females) %>%
  mutate(Status = ifelse(Egg_N < 100, "Insufficient", "Available")) %>%
  kable(col.names = c("Line", "Egg Records (N)", "% of Females", "Status"),
        align = c("l", rep("r", 2), "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#e53e3e", color = "white")

Line	Egg Records (N)	% of Females	Status
LIL-20	46	9.7	Insufficient
BIL-20	43	12.3	Insufficient
NHIL-20	31	8.6	Insufficient
WIL-20	28	9.7	Insufficient

4.2 Reasons for Excluding Egg Traits

Sample size limitations: With fewer than 100 records per line in most cases, variance component estimation would produce unreliable and imprecise heritability estimates.
Pedigree connectivity: Limited number of dam families with egg records reduces the ability to separate genetic from environmental effects.
Generational coverage: Egg data is primarily available from G1 females, with G2 recording still in progress.
Standard error concerns: Expected standard errors would exceed acceptable thresholds for publication-quality estimates.

5 Limitations

⚠️ Important Limitations to Consider

The following limitations should be considered when interpreting the results of this evaluation:

Small sample sizes: Population sizes range from 662 to 1215 animals per line, which limits statistical power and may affect variance component estimates.
Single trait analysis: The current evaluation focuses only on body weight at 16 weeks. Other economically important traits (egg production, survival, feed efficiency) require additional data collection before they can be included in a comprehensive selection index.
Limited pedigree depth: With only three generations (G0, G1, G2), the pedigree structure provides limited information for genetic evaluation. G0 founders have unknown parents by design.
Limited relatedness: The number of unique sires and dams is relatively small, creating unbalanced family structures and potentially affecting heritability estimates.
No genomic data: Pedigree-based relationships may not fully capture actual genetic relationships, and genomic selection is not yet possible.

6 Conclusions and Future Directions

6.1 Key Findings

✅ Summary of Results



Measure
Finding


Total birds evaluated
3,790 across 4 lines

Generations included
G0, G1, and G2

Survival rates
All lines > 92%

Mean BW at 16 weeks
981g to 1354g

Heritability range
0.298 to 0.76

Genetic trend
Positive for most lines from G0 to G2

Measure	Finding
Total birds evaluated	3,790 across 4 lines
Generations included	G0, G1, and G2
Survival rates	All lines > 92%
Mean BW at 16 weeks	981g to 1354g
Heritability range	0.298 to 0.76
Genetic trend	Positive for most lines from G0 to G2

📋 Note on Selection Trait

The current breeding program focuses on a single selection trait: body weight at 16 weeks (BW_16wk). This trait was chosen as the primary selection criterion due to its economic importance and ease of measurement. While egg production traits are being recorded, they are not yet incorporated into the breeding objective due to insufficient data for reliable genetic evaluation.

6.2 Recommended Actions

📋 Priority Actions

Continue data collection: The current analysis is based on relatively small sample sizes with a single trait. More phenotypic records across generations are needed to improve precision of genetic parameter estimates.
Expand trait recording: Systematic recording of additional economically important traits is essential:
- Egg production traits (age at first egg, egg number, egg weight)
- Survival and disease resistance
- Feed efficiency (if feasible)
Establish biannual evaluation schedule: Conduct genetic evaluations twice per year to ensure timely selection decisions and continuous progress monitoring.
Multi-trait selection index: As sufficient data becomes available for multiple traits, develop a comprehensive selection index that balances growth and reproductive traits.
Consider genomic tools: Implement SNP genotyping to improve relationship estimation and enable genomic selection.
Standardize ID systems: Ensure consistent animal identification across data collection, pedigree recording, and genetic evaluation to avoid ID matching issues.

🌍 Extension to Other Project Countries

This analytical framework should be extended to poultry datasets from other project countries to strengthen regional genetic improvement:

Region	Countries	Priority	Expected Impact
East Africa	Tanzania	High	Harmonize evaluation methods, enable germplasm exchange
West Africa	Nigeria	Medium	Regional expansion, G×E assessment
Asia	Vietnam, other project countries	Medium	Cross-regional comparison, diverse environment testing

Benefits of Multi-Country Extension:

Cross-population comparisons: Identify best-performing lines across environments
Genotype-by-environment (G×E) assessment: Understand adaptability of different lines across African and Asian production systems
Larger combined dataset: Improve precision of genetic parameter estimates
Regional breeding networks: Facilitate germplasm exchange and collaborative improvement within and across continents

6.3 Value for Stakeholders

🤝 Potential Future Applications

As this breeding program matures and more comprehensive data becomes available, the genetic evaluation framework can provide value for various stakeholders:

For Internal Program Management:

Evidence-based selection decisions
Tracking of genetic progress over time
Identification of breeding structure issues

For Future Partnerships:

Transparent documentation of genetic improvement
Genetically evaluated breeding stock with EBV records
Foundation for collaboration with other breeding programs

For Capacity Building:

Training opportunities in quantitative genetics methods
Development of local expertise in genetic evaluation
Replicable framework for other species/programs

Poultry Breeding Program Evaluation

Combined Summary Report: LIL-20, BIL-20, NHIL-20, and WIL-20 Lines

Setegn Worku Alemu, PhD | Quantitative Geneticist

December 21, 2025