library(readxl)
library(tidyverse)
library(janitor)
library(knitr)
library(kableExtra)
library(scales)
library(gridExtra)

if (!require(mclust, quietly = TRUE)) {
  install.packages("mclust", quiet = TRUE)
}
library(mclust)

if (!require(readODS, quietly = TRUE)) {
  install.packages("readODS", repos = "https://cloud.r-project.org", quiet = TRUE)
}
library(readODS)

# Custom theme for all lines
theme_report <- function() {
  theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(face = "bold", size = 14, hjust = 0, color = "#1a365d"),
      plot.subtitle = element_text(size = 11, hjust = 0, color = "#4a5568"),
      legend.position = "bottom",
      panel.grid.minor = element_blank(),
      axis.title = element_text(face = "bold", size = 11),
      strip.text = element_text(face = "bold", size = 11)
    )
}

# Tilili-specific theme (Teal palette)
theme_tilili <- function() {
  theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(face = "bold", size = 14, hjust = 0, color = "#234e52"),
      plot.subtitle = element_text(size = 11, hjust = 0, color = "#4a5568"),
      legend.position = "bottom",
      panel.grid.minor = element_blank(),
      axis.title = element_text(face = "bold", size = 11),
      strip.text = element_text(face = "bold", size = 11)
    )
}
theme_set(theme_report())

line_colors <- c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                 "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936", "Tilili" = "#e53e3e")

# COLORBLIND-FRIENDLY PALETTE for Tilili generations
gen_colors <- c("G3" = "#0077BB", "G4" = "#EE7733", "G5" = "#009988")
batch_colors <- c("0" = "#0077BB", "3" = "#33BBEE", "4" = "#009988", 
                  "5" = "#EE7733", "6" = "#CC3311", "7" = "#EE3377")

working_dir <- "C:/Users/SAlemu/OneDrive - CGIAR/Documents/Poultryaudit/Dataset"
setwd(working_dir)

pheno_file_3lines <- "Additional_Poultry dataset_audit.xlsx"
pheno_file_wil <- "Dataset_evaluation_corrected.xlsx"
tilili_file <- "AllG-345.csv"

wombat_base <- "Analysis_with_wombat2"
model_dir <- "Model1_Animal_YearMonth"

# Tilili-specific paths for egg data
tilili_wombat_dir <- file.path(wombat_base, "Tilili", "Model1_Animal_YearMonth")
egg_weight_file <- file.path(wombat_base, "TililiAEW", "Masterfile_AEW.csv")
merged_egg_file <- file.path(wombat_base, "Tililieggnum", "Merged_egg.csv")
old_egg_file <- file.path(wombat_base, "Tililieggnum", "Masterfile_egg.csv")

# Determine egg number file
if (file.exists(merged_egg_file)) {
  tryCatch({
    test_read <- read_ods(merged_egg_file, sheet = 1, col_names = TRUE)
    if (nrow(test_read) > 0) {
      egg_number_file <- merged_egg_file
    } else {
      egg_number_file <- old_egg_file
    }
  }, error = function(e) {
    tryCatch({
      test_read <- read_csv(merged_egg_file, show_col_types = FALSE)
      if (nrow(test_read) > 0) {
        egg_number_file <- merged_egg_file
      } else {
        egg_number_file <- old_egg_file
      }
    }, error = function(e2) {
      egg_number_file <- old_egg_file
    })
  })
} else {
  egg_number_file <- old_egg_file
}

clean_names_custom <- function(df) {
  colnames(df) <- gsub(" ", "_", colnames(df))
  return(df)
}

convert_types <- function(df) {
  df$Generation <- as.character(df$Generation)
  if ("Batch" %in% colnames(df)) df$Batch <- as.character(df$Batch)
  df$ID <- as.character(df$ID)
  if ("Sire_ID" %in% colnames(df)) df$Sire_ID <- as.character(df$Sire_ID)
  if ("Dam_ID" %in% colnames(df)) df$Dam_ID <- as.character(df$Dam_ID)
  return(df)
}

lil_data <- read_excel(pheno_file_3lines, sheet = "LIL") %>% clean_names_custom() %>% convert_types()
bil_data <- read_excel(pheno_file_3lines, sheet = "BIL") %>% clean_names_custom() %>% convert_types()
nhil_data <- read_excel(pheno_file_3lines, sheet = "NHIL") %>% clean_names_custom() %>% convert_types()
wil_data <- read_excel(pheno_file_wil) %>% clean_names_custom() %>% convert_types()

tilili_raw <- read.csv(tilili_file, stringsAsFactors = FALSE, fileEncoding = "Latin1")
colnames(tilili_raw) <- trimws(colnames(tilili_raw))

tilili_data <- data.frame(
  ID = as.character(tilili_raw$ChickenID),
  Sire_ID = as.character(tilili_raw$SireID),
  Dam_ID = as.character(tilili_raw$DamId),
  Sex = as.character(tilili_raw$Sex),
  Batch = as.character(tilili_raw$Batch),
  Generation = as.character(tilili_raw$Generation),
  BW_16wk = as.numeric(tilili_raw$Week16),
  stringsAsFactors = FALSE
)

tilili_data$Sire_ID[tilili_data$Sire_ID %in% c("", "0", "NA", "na") | is.na(tilili_data$Sire_ID)] <- NA
tilili_data$Dam_ID[tilili_data$Dam_ID %in% c("", "0", "NA", "na") | is.na(tilili_data$Dam_ID)] <- NA

lil_data$Line <- "LIL-20"
bil_data$Line <- "BIL-20"
nhil_data$Line <- "NHIL-20"
wil_data$Line <- "WIL-20"
tilili_data$Line <- "Tilili"

lines <- c("LIL", "BIL", "NHIL", "WIL", "Tilili")
all_vc <- list()
all_ebv <- list()

extract_wombat <- function(line_code) {
  sum_est_file <- file.path(wombat_base, line_code, model_dir, "SumEstimates.out")
  ebv_file <- file.path(wombat_base, line_code, model_dir, "RnSoln_animal.dat")
  id_map_file <- file.path(wombat_base, line_code, "ID_mapping.csv")
  
  result <- list(vc = NULL, ebv = NULL)
  
  if (!file.exists(sum_est_file)) return(result)
  
  line_label <- if (line_code == "Tilili") "Tilili" else paste0(line_code, "-20")
  
  sum_lines <- readLines(sum_est_file)
  Va <- NA; Va_SE <- NA; Ve <- NA; Ve_SE <- NA; Vp <- NA; h2 <- NA; h2_SE <- NA; N_rec <- NA
  
  covs_a_idx <- grep("COVS A 1 1.*vrat", sum_lines)
  if (length(covs_a_idx) > 0) {
    covs_a_line <- sum_lines[covs_a_idx[1]]
    nums <- as.numeric(unlist(regmatches(covs_a_line, gregexpr("[0-9]+\\.?[0-9]*", covs_a_line))))
    if (length(nums) >= 7) { Va <- nums[4]; Va_SE <- nums[5]; h2 <- nums[6]; h2_SE <- nums[7] }
  }
  
  covs_z_idx <- grep("COVS Z 1 1.*vrat", sum_lines)
  if (length(covs_z_idx) > 0) {
    covs_z_line <- sum_lines[covs_z_idx[1]]
    nums_z <- as.numeric(unlist(regmatches(covs_z_line, gregexpr("[0-9]+\\.?[0-9]*", covs_z_line))))
    if (length(nums_z) >= 5) { Ve <- nums_z[4]; Ve_SE <- nums_z[5] }
  }
  
  covs_t_idx <- grep("COVS T 1 1", sum_lines)
  if (length(covs_t_idx) > 0) {
    covs_t_line <- sum_lines[covs_t_idx[1]]
    nums_t <- as.numeric(unlist(regmatches(covs_t_line, gregexpr("[0-9]+\\.?[0-9]*", covs_t_line))))
    if (length(nums_t) >= 5) { Vp <- nums_t[4] }
  }
  
  rec_idx <- grep("No. of records", sum_lines)
  if (length(rec_idx) > 0) {
    rec_line <- sum_lines[rec_idx[1]]
    nums_rec <- as.numeric(unlist(regmatches(rec_line, gregexpr("[0-9]+", rec_line))))
    N_rec <- nums_rec[1]
  }
  
  if (!is.na(h2)) {
    result$vc <- data.frame(
      Line = line_label, N = ifelse(!is.na(N_rec), N_rec, NA),
      Va = ifelse(!is.na(Va), round(Va, 1), NA), Va_SE = ifelse(!is.na(Va_SE), round(Va_SE, 1), NA),
      Ve = ifelse(!is.na(Ve), round(Ve, 1), NA), Ve_SE = ifelse(!is.na(Ve_SE), round(Ve_SE, 1), NA),
      Vp = ifelse(!is.na(Vp), round(Vp, 1), NA), h2 = round(h2, 3), h2_SE = round(h2_SE, 3)
    )
  }
  
  if (file.exists(ebv_file) && file.exists(id_map_file)) {
    ebv_lines <- readLines(ebv_file)
    header_idx <- grep("Run_No", ebv_lines)
    
    if (length(header_idx) > 0) {
      data_start <- header_idx[1] + 1
      data_lines <- ebv_lines[data_start:length(ebv_lines)]
      data_lines <- data_lines[nchar(trimws(data_lines)) > 0]
      
      ebv_data <- read.table(text = data_lines, header = FALSE, fill = TRUE)
      
      if (ncol(ebv_data) >= 7) {
        colnames(ebv_data) <- c("Run_No", "ID_num", "Trait", "EBV", "SE_EBV", "Accuracy", "Inbreeding_pct")
      } else {
        colnames(ebv_data) <- c("Run_No", "ID_num", "Trait", "EBV", "Inbreeding_pct")[1:ncol(ebv_data)]
      }
      
      id_map <- read.csv(id_map_file, stringsAsFactors = FALSE)
      ebv_merged <- merge(ebv_data, id_map, by.x = "ID_num", by.y = "ID_num", all.x = TRUE)
      
      result$ebv <- data.frame(
        Line = line_label, ID_original = as.character(ebv_merged$ID_orig),
        ID_wombat = ebv_merged$ID_num, EBV = ebv_merged$EBV,
        SE_EBV = if ("SE_EBV" %in% colnames(ebv_merged)) ebv_merged$SE_EBV else NA,
        Accuracy = if ("Accuracy" %in% colnames(ebv_merged)) ebv_merged$Accuracy else NA,
        Inbreeding_pct = if ("Inbreeding_pct" %in% colnames(ebv_merged)) ebv_merged$Inbreeding_pct else NA
      )
    }
  }
  
  return(result)
}

for (ln in lines) {
  res <- extract_wombat(ln)
  if (!is.null(res$vc)) all_vc[[ln]] <- res$vc
  if (!is.null(res$ebv)) all_ebv[[ln]] <- res$ebv
}

vc_summary <- do.call(rbind, all_vc)
ebv_all <- do.call(rbind, all_ebv)

compute_pheno_stats <- function(data, line_name) {
  total <- nrow(data)
  g0 <- sum(data$Generation == "0", na.rm = TRUE)
  g1 <- sum(data$Generation == "1", na.rm = TRUE)
  g2 <- sum(data$Generation == "2", na.rm = TRUE)
  g3 <- sum(data$Generation == "3", na.rm = TRUE)
  g4 <- sum(data$Generation == "4", na.rm = TRUE)
  g5 <- sum(data$Generation == "5", na.rm = TRUE)
  
  if ("Cull_Reason" %in% colnames(data)) {
    deaths <- sum(!is.na(data$Cull_Reason) & data$Cull_Reason != "", na.rm = TRUE)
  } else if ("Reason" %in% colnames(data)) {
    deaths <- sum(!is.na(data$Reason) & data$Reason != "", na.rm = TRUE)
  } else { deaths <- 0 }
  survival <- round((1 - deaths / total) * 100, 1)
  
  bw16_n <- sum(!is.na(data$BW_16wk))
  bw16_mean <- round(mean(data$BW_16wk, na.rm = TRUE), 0)
  bw16_sd <- round(sd(data$BW_16wk, na.rm = TRUE), 0)
  bw16_min <- round(min(data$BW_16wk, na.rm = TRUE), 0)
  bw16_max <- round(max(data$BW_16wk, na.rm = TRUE), 0)
  bw16_cv <- round(bw16_sd / bw16_mean * 100, 1)
  
  male_bw <- data %>% filter(Sex == "M") %>% pull(BW_16wk) %>% mean(na.rm = TRUE) %>% round(0)
  female_bw <- data %>% filter(Sex == "F") %>% pull(BW_16wk) %>% mean(na.rm = TRUE) %>% round(0)
  dimorphism <- round((male_bw - female_bw) / female_bw * 100, 1)
  
  afe_col <- colnames(data)[grepl("AFE", colnames(data), ignore.case = TRUE)][1]
  egg_n <- if (!is.na(afe_col)) sum(!is.na(data[[afe_col]])) else 0
  n_females <- sum(data$Sex == "F", na.rm = TRUE)
  egg_pct <- ifelse(n_females > 0, round(egg_n / n_females * 100, 1), 0)
  
  data.frame(
    Line = line_name, Total = total, G0 = g0, G1 = g1, G2 = g2, G3 = g3, G4 = g4, G5 = g5,
    Survival_Pct = survival, BW16_N = bw16_n, BW16_Mean = bw16_mean, BW16_SD = bw16_sd,
    BW16_Min = bw16_min, BW16_Max = bw16_max, BW16_CV = bw16_cv,
    Male_BW16 = male_bw, Female_BW16 = female_bw, Dimorphism_Pct = dimorphism,
    Egg_N = egg_n, Egg_Pct_Females = egg_pct
  )
}

pheno_stats <- bind_rows(
  compute_pheno_stats(lil_data, "LIL-20"),
  compute_pheno_stats(bil_data, "BIL-20"),
  compute_pheno_stats(nhil_data, "NHIL-20"),
  compute_pheno_stats(wil_data, "WIL-20"),
  compute_pheno_stats(tilili_data, "Tilili")
)

ebv_stats <- ebv_all %>%
  group_by(Line) %>%
  summarise(N = n(), Mean_EBV = round(mean(EBV, na.rm = TRUE), 1),
            SD_EBV = round(sd(EBV, na.rm = TRUE), 1),
            Min_EBV = round(min(EBV, na.rm = TRUE), 1),
            Max_EBV = round(max(EBV, na.rm = TRUE), 1),
            Mean_F = round(mean(Inbreeding_pct, na.rm = TRUE), 2),
            N_Inbred = sum(Inbreeding_pct > 0, na.rm = TRUE), .groups = "drop")

total_birds <- sum(pheno_stats$Total)

compute_parents_by_gen <- function(data, line_name) {
  sire_by_gen <- data %>%
    filter(!is.na(Sire_ID) & Sire_ID != "" & Sire_ID != "0" & Sire_ID != "NA") %>%
    group_by(Generation) %>%
    summarise(N_Sires = n_distinct(Sire_ID), .groups = "drop")
  
  dam_by_gen <- data %>%
    filter(!is.na(Dam_ID) & Dam_ID != "" & Dam_ID != "0" & Dam_ID != "NA") %>%
    group_by(Generation) %>%
    summarise(N_Dams = n_distinct(Dam_ID), .groups = "drop")
  
  full_join(sire_by_gen, dam_by_gen, by = "Generation") %>%
    mutate(Line = line_name, Generation = paste0("G", Generation)) %>%
    select(Line, Generation, N_Sires, N_Dams)
}

# Enhanced inbreeding calculation function with robust metrics
compute_inbreeding_detailed <- function(ebv_data, pheno_data, line_name) {
  ebv_with_gen <- ebv_data %>%
    filter(Line == line_name) %>%
    mutate(ID_original = as.character(ID_original)) %>%
    left_join(pheno_data %>% select(ID, Generation) %>% mutate(ID = as.character(ID)),
              by = c("ID_original" = "ID")) %>%
    filter(!is.na(Generation))
  
  if (nrow(ebv_with_gen) == 0) return(NULL)
  
  ebv_with_gen %>%
    group_by(Generation) %>%
    summarise(
      N_Total = n(), 
      N_F_gt_0 = sum(Inbreeding_pct > 0, na.rm = TRUE),
      N_F_ge_6.25 = sum(Inbreeding_pct >= 6.25, na.rm = TRUE),
      N_F_ge_12.5 = sum(Inbreeding_pct >= 12.5, na.rm = TRUE),
      N_F_ge_25 = sum(Inbreeding_pct >= 25, na.rm = TRUE),
      Mean_F = round(mean(Inbreeding_pct, na.rm = TRUE), 2),
      Median_F = round(median(Inbreeding_pct, na.rm = TRUE), 2),
      Max_F = round(max(Inbreeding_pct, na.rm = TRUE), 2),
      SD_F = round(sd(Inbreeding_pct, na.rm = TRUE), 2),
      .groups = "drop"
    ) %>%
    mutate(
      Line = line_name, 
      Generation = paste0("G", Generation),
      Pct_F_gt_0 = round(N_F_gt_0 / N_Total * 100, 1),
      Pct_F_ge_6.25 = round(N_F_ge_6.25 / N_Total * 100, 1),
      Pct_F_ge_12.5 = round(N_F_ge_12.5 / N_Total * 100, 1),
      Pct_F_ge_25 = round(N_F_ge_25 / N_Total * 100, 1),
      # Delta F calculation (rate of inbreeding)
      Delta_F = ifelse(Mean_F > 0, round(Mean_F / 100, 4), NA)
    ) %>%
    select(Line, Generation, N_Total, N_F_gt_0, Pct_F_gt_0, N_F_ge_6.25, Pct_F_ge_6.25,
           N_F_ge_12.5, Pct_F_ge_12.5, N_F_ge_25, Pct_F_ge_25, Mean_F, Median_F, SD_F, Max_F, Delta_F)
}

# Compute mating ratio from pedigree data
compute_mating_ratio <- function(data, line_name) {
  mating_info <- data %>%
    filter(!is.na(Sire_ID) & Sire_ID != "" & Sire_ID != "0" & Sire_ID != "NA" &
           !is.na(Dam_ID) & Dam_ID != "" & Dam_ID != "0" & Dam_ID != "NA") %>%
    group_by(Generation) %>%
    summarise(
      N_Offspring = n(),
      N_Sires = n_distinct(Sire_ID),
      N_Dams = n_distinct(Dam_ID),
      .groups = "drop"
    ) %>%
    mutate(
      Line = line_name,
      Generation = paste0("G", Generation),
      Mating_Ratio = round(N_Dams / N_Sires, 1),
      Mating_Ratio_Label = paste0("1:", round(N_Dams / N_Sires, 0))
    ) %>%
    select(Line, Generation, N_Sires, N_Dams, N_Offspring, Mating_Ratio, Mating_Ratio_Label)
  
  return(mating_info)
}

compute_sire_ebv <- function(data, ebv_data, line_name) {
  sires <- data %>%
    filter(!is.na(Sire_ID) & Sire_ID != "" & Sire_ID != "0" & Sire_ID != "NA") %>%
    pull(Sire_ID) %>% unique()
  
  sire_ebvs <- ebv_data %>% filter(Line == line_name, ID_original %in% sires)
  
  if (nrow(sire_ebvs) > 0) {
    data.frame(Line = line_name, N_Sires = length(sires), N_Sires_With_EBV = nrow(sire_ebvs),
      Mean_Sire_EBV = round(mean(sire_ebvs$EBV, na.rm = TRUE), 1),
      SD_Sire_EBV = round(sd(sire_ebvs$EBV, na.rm = TRUE), 1),
      Min_Sire_EBV = round(min(sire_ebvs$EBV, na.rm = TRUE), 1),
      Max_Sire_EBV = round(max(sire_ebvs$EBV, na.rm = TRUE), 1))
  } else {
    data.frame(Line = line_name, N_Sires = length(sires), N_Sires_With_EBV = 0,
               Mean_Sire_EBV = NA, SD_Sire_EBV = NA, Min_Sire_EBV = NA, Max_Sire_EBV = NA)
  }
}

compute_sire_ebv_by_gen <- function(data, ebv_data, line_name) {
  offspring_data <- data %>%
    filter(!is.na(Sire_ID) & Sire_ID != "" & Sire_ID != "0" & Sire_ID != "NA") %>%
    select(ID, Sire_ID, Generation) %>%
    mutate(ID = as.character(ID), Sire_ID = as.character(Sire_ID))
  
  if (nrow(offspring_data) == 0) return(NULL)
  
  sire_by_gen <- offspring_data %>%
    group_by(Generation) %>%
    summarise(Sires = list(unique(Sire_ID)), N_Offspring = n(), .groups = "drop")
  
  sire_ebvs <- ebv_data %>%
    filter(Line == line_name) %>%
    select(ID_original, EBV) %>%
    mutate(ID_original = as.character(ID_original))
  
  result <- lapply(1:nrow(sire_by_gen), function(i) {
    gen <- sire_by_gen$Generation[i]
    sires_in_gen <- unlist(sire_by_gen$Sires[i])
    n_offspring <- sire_by_gen$N_Offspring[i]
    
    sire_ebv_gen <- sire_ebvs %>% filter(ID_original %in% sires_in_gen)
    
    if (nrow(sire_ebv_gen) > 0) {
      data.frame(Line = line_name, Generation = paste0("G", gen), N_Sires = length(sires_in_gen),
        N_Sires_With_EBV = nrow(sire_ebv_gen), N_Offspring = n_offspring,
        Mean_Sire_EBV = round(mean(sire_ebv_gen$EBV, na.rm = TRUE), 1),
        SD_Sire_EBV = round(sd(sire_ebv_gen$EBV, na.rm = TRUE), 1),
        Min_Sire_EBV = round(min(sire_ebv_gen$EBV, na.rm = TRUE), 1),
        Max_Sire_EBV = round(max(sire_ebv_gen$EBV, na.rm = TRUE), 1))
    } else {
      data.frame(Line = line_name, Generation = paste0("G", gen), N_Sires = length(sires_in_gen),
        N_Sires_With_EBV = 0, N_Offspring = n_offspring,
        Mean_Sire_EBV = NA, SD_Sire_EBV = NA, Min_Sire_EBV = NA, Max_Sire_EBV = NA)
    }
  })
  
  do.call(rbind, result)
}

1 Executive Summary

This comprehensive report presents descriptive statistics and genetic analysis results for five ILRI poultry lines: LIL-20, BIL-20, NHIL-20, WIL-20, and Tilili (local Ethiopian breed). The evaluation encompasses phenotypic characterization across multiple generations and genetic parameter estimation using WOMBAT software for body weight at 16 weeks, including in-depth Tilili analysis with egg production traits.

9,696

Total Birds

All 5 Lines

1215

LIL-20

797

BIL-20

1116

NHIL-20

662

WIL-20

5906

Tilili

Local Breed

📋 Internal Audit Purpose

This genetic evaluation was initiated for internal audit purposes to assess the current status of ILRI’s poultry breeding program. The analysis establishes baseline genetic parameters and identifies areas for improvement.

Report Sections:

Breed Management Context - Housing systems and environmental factors
Phenotypic Summary - Population structure and body weight analysis
Mating Ratio Analysis - Breeding structure assessment (1:4 for ILRI lines, 1:10 for Tilili)
Enhanced Inbreeding Analysis - Comprehensive F-coefficient evaluation
Genetic Analysis - Variance components, heritability, and EBVs
NHIL-20 Detailed Analysis - Population substructure investigation
Tilili In-Depth Analysis - Comprehensive egg production traits

2 Breed Management Context

🌿 Housing System & Environmental Factors

Understanding the production environment is critical when comparing Estimated Breeding Values (EBVs) across lines. The housing systems differ fundamentally between the Tilili line and all other ILRI lines.

2.0.1 Tilili Line Semi-Scavenging System

The Tilili ecotype is raised under a two-phase semi-scavenging system designed to simulate smallholder farming conditions:

Phase 1 (Weeks 0-9): Controlled/conventional housing for early growth and health monitoring
Phase 2 (Weeks 9-16): Free-range conditions in outdoor paddocks to assess adaptability
Restricted feeding (80%): Only 80% of standard feed ration to encourage natural foraging behavior
Mating ratio: 1:10 (1 male to 10 females) for natural mating
Semi-scavenging system: Supplemental feeding plus natural food sources (insects, seeds, vegetation)

2.0.2 ILRI Lines Conventional Housing

All other breeds (LIL-20, BIL-20, NHIL-20, WIL-20) are raised in conventional housing systems:

Indoor housing: Climate-controlled environment
Controlled feeding: Standardized feed rations with measured intake
Mating ratio: 1:4 (1 male to 4 females) as standard breeding practice
Biosecurity: Lower disease exposure through controlled entry

📋 Line-by-Line Housing and Mating Summary

Line	Housing System	Mating Ratio	Environment Type
LIL-20	Conventional Housing	1:4	Conventional
BIL-20	Conventional Housing	1:4	Conventional
NHIL-20	Conventional Housing	1:4	Conventional
WIL-20	Conventional Housing	1:4	Conventional
Tilili	Semi-Scavenging (Wk 0-9 Controlled, Wk 9-16 Free-Range)	1:10	Semi-Scavenging

3 Mating Ratio Analysis

🐓 Mating Ratios by Breeding Line

The breeding program uses different mating ratios based on line characteristics and management systems:

Line Type	Mating Ratio	Rationale
ILRI Lines (LIL-20, BIL-20, NHIL-20, WIL-20)	1:4	Controlled pens, higher selection intensity
Tilili (Local Ethiopian)	1:10	Natural mating, simulates smallholder conditions

Key considerations:

Genetic diversity: Using multiple sires prevents excessive inbreeding
Selection intensity: Lower ratio (1:4) allows stronger male selection; higher ratio (1:10) reduces male replacement needs
Effective population size (Ne): Higher Ne reduces genetic drift
Practical management: 1:10 ratio typical for semi-scavenging systems in smallholder farms

Formula for Effective Population Size:

Ne = (4 × Nm × Nf) / (Nm + Nf)

Where: Nm = number of males, Nf = number of females

With a 1:4 ratio: If Nm = 10, Nf = 40, then Ne = (4 × 10 × 40) / (10 + 40) = 32
With a 1:10 ratio: If Nm = 5, Nf = 50, then Ne = (4 × 5 × 50) / (5 + 50) = 18.2

3.1 Observed Mating Ratios by Line and Generation

mating_all <- bind_rows(
  compute_mating_ratio(lil_data, "LIL-20"),
  compute_mating_ratio(bil_data, "BIL-20"),
  compute_mating_ratio(nhil_data, "NHIL-20"),
  compute_mating_ratio(wil_data, "WIL-20"),
  compute_mating_ratio(tilili_data, "Tilili")
)

if (nrow(mating_all) > 0) {
  mating_all %>%
    kable(col.names = c("Line", "Generation", "N Sires", "N Dams", "N Offspring", "Ratio", "Mating Ratio"),
          align = c("l", "l", rep("r", 5))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#3182ce", color = "white")
}

Line	Generation	N Sires	N Dams	N Offspring	Ratio	Mating Ratio
LIL-20	G1	11	22	157	2.0	1:2
LIL-20	G2	12	43	446	3.6	1:4
BIL-20	G1	6	27	155	4.5	1:4
BIL-20	G2	8	39	342	4.9	1:5
NHIL-20	G1	5	24	151	4.8	1:5
NHIL-20	G2	6	27	244	4.5	1:4
WIL-20	G1	7	24	92	3.4	1:3
WIL-20	G2	9	23	306	2.6	1:3
Tilili	G3	48	212	722	4.4	1:4
Tilili	G4	44	289	4112	6.6	1:7
Tilili	G5	16	87	1072	5.4	1:5

if (nrow(mating_all) > 0) {
  # Add target lines: 4 for ILRI lines, 10 for Tilili
  ggplot(mating_all, aes(x = Generation, y = Mating_Ratio, fill = Line)) +
    geom_bar(stat = "identity", position = "dodge", width = 0.7) +
    geom_hline(yintercept = 4, linetype = "dashed", color = "#3182ce", linewidth = 1) +
    geom_hline(yintercept = 10, linetype = "dashed", color = "#e53e3e", linewidth = 1) +
    annotate("text", x = 0.6, y = 4.5, label = "Target ILRI lines: 1:4", color = "#3182ce", fontface = "bold", hjust = 0, size = 3.5) +
    annotate("text", x = 0.6, y = 10.5, label = "Target Tilili: 1:10", color = "#e53e3e", fontface = "bold", hjust = 0, size = 3.5) +
    geom_text(aes(label = Mating_Ratio_Label), position = position_dodge(width = 0.7), 
              vjust = -0.5, size = 3.5, fontface = "bold") +
    scale_fill_manual(values = line_colors) +
    labs(title = "Observed Mating Ratio by Line and Generation",
         subtitle = "Target: 1:4 for ILRI lines (blue) | 1:10 for Tilili (red)",
         x = "Generation", y = "Females per Male", fill = "Line") +
    theme_report() +
    scale_y_continuous(expand = expansion(mult = c(0, 0.15)))
}

⚠️ Mating Ratio Deviations

If observed mating ratios deviate significantly from targets:

For ILRI Lines (Target 1:4):

Higher ratio (e.g., 1:6, 1:8): Each male serves more females, increasing inbreeding risk
Lower ratio (e.g., 1:2): Fewer offspring per male, reduced selection intensity

For Tilili (Target 1:10):

Higher ratio (e.g., 1:15): May exceed male’s reproductive capacity, reduce fertility
Lower ratio (e.g., 1:5): Higher male cost, deviates from smallholder practice

Recommendations:

ILRI Lines: Maintain 1:4 ratio for controlled selection and genetic diversity
Tilili: Maintain 1:10 ratio to simulate smallholder conditions and natural mating

4 Phenotypic Summary

4.1 Population Structure

gen_cols <- c("G0", "G1", "G2", "G3", "G4", "G5")
gen_with_data <- gen_cols[colSums(pheno_stats[, gen_cols]) > 0]

display_data <- pheno_stats %>% select(Line, Total, all_of(gen_with_data), Survival_Pct)

totals_row <- data.frame(Line = "TOTAL", Total = sum(pheno_stats$Total), stringsAsFactors = FALSE)
for (g in gen_with_data) { totals_row[[g]] <- sum(pheno_stats[[g]]) }
totals_row$Survival_Pct <- round(mean(pheno_stats$Survival_Pct), 1)

display_data <- bind_rows(display_data, totals_row)

display_data %>%
  kable(col.names = c("Line", "Total", gen_with_data, "Survival %"),
        align = c("l", rep("r", length(gen_with_data) + 2))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#2d3748", color = "white") %>%
  row_spec(6, bold = TRUE, background = "#f7fafc")

Line	Total	G0	G1	G2	G3	G4	G5	Survival %
LIL-20	1215	612	157	446	0	0	0	74.7
BIL-20	797	300	155	342	0	0	0	92.8
NHIL-20	1116	721	151	244	0	0	0	62.5
WIL-20	662	264	92	306	0	0	0	94.9
Tilili	5906	0	0	0	722	4112	1072	100.0
TOTAL	9696	1897	555	1338	722	4112	1072	85.0

4.2 Body Weight at 16 Weeks

pheno_stats %>%
  select(Line, BW16_N, BW16_Mean, BW16_SD, BW16_Min, BW16_Max, BW16_CV) %>%
  kable(col.names = c("Line", "N", "Mean (g)", "SD (g)", "Min (g)", "Max (g)", "CV %"),
        align = c("l", rep("r", 6))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#3182ce", color = "white")

Line	N	Mean (g)	SD (g)	Min (g)	Max (g)	CV %
LIL-20	793	981	224	231	1535	22.8
BIL-20	686	1354	281	392	2254	20.8
NHIL-20	641	1196	409	336	2204	34.2
WIL-20	601	1148	272	359	1875	23.7
Tilili	3783	1073	303	110	2210	28.2

all_pheno <- bind_rows(
  lil_data %>% select(Line, BW_16wk), bil_data %>% select(Line, BW_16wk),
  nhil_data %>% select(Line, BW_16wk), wil_data %>% select(Line, BW_16wk),
  tilili_data %>% select(Line, BW_16wk)
)

ggplot(all_pheno %>% filter(!is.na(BW_16wk)), aes(x = Line, y = BW_16wk, fill = Line)) +
  geom_boxplot(alpha = 0.8, outlier.shape = 21) +
  scale_fill_manual(values = line_colors) +
  labs(title = "Body Weight at 16 Weeks - Comparison Across Lines", x = "Line", y = "Body Weight (g)") +
  theme_report() + theme(legend.position = "none")

4.3 Phenotypic Trend

pheno_trend <- bind_rows(
  lil_data %>% select(Line, Generation, BW_16wk),
  bil_data %>% select(Line, Generation, BW_16wk),
  nhil_data %>% select(Line, Generation, BW_16wk),
  wil_data %>% select(Line, Generation, BW_16wk),
  tilili_data %>% select(Line, Generation, BW_16wk)
) %>%
  filter(!is.na(BW_16wk)) %>%
  group_by(Line, Generation) %>%
  summarise(
    N = n(),
    Mean_BW16 = mean(BW_16wk, na.rm = TRUE),
    SE_BW16 = sd(BW_16wk, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  ) %>%
  mutate(Generation = paste0("G", Generation))

y_min <- floor(min(pheno_trend$Mean_BW16 - pheno_trend$SE_BW16, na.rm = TRUE) / 100) * 100 - 100
y_max <- ceiling(max(pheno_trend$Mean_BW16 + pheno_trend$SE_BW16, na.rm = TRUE) / 100) * 100 + 150

ggplot(pheno_trend, aes(x = Generation, y = Mean_BW16, color = Line, group = Line)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = Mean_BW16 - SE_BW16, ymax = Mean_BW16 + SE_BW16), 
                width = 0.1, linewidth = 0.8) +
  geom_text(aes(label = round(Mean_BW16, 0)), vjust = -1.5, size = 3.5, show.legend = FALSE) +
  scale_color_manual(values = line_colors) +
  labs(title = "Phenotypic Trend: Mean Body Weight at 16 Weeks by Generation",
       subtitle = "Error bars represent ± 1 standard error | Y-axis does not start at zero",
       x = "Generation", y = "Mean Body Weight (g)", color = "Line") +
  theme_report() +
  coord_cartesian(ylim = c(y_min, y_max))

4.4 Sexual Dimorphism

pheno_stats %>%
  select(Line, Male_BW16, Female_BW16, Dimorphism_Pct) %>%
  kable(col.names = c("Line", "Male BW_16wk (g)", "Female BW_16wk (g)", "Dimorphism (%)"),
        align = c("l", rep("r", 3))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2d3748", color = "white")

Line	Male BW_16wk (g)	Female BW_16wk (g)	Dimorphism (%)
LIL-20	1125	866	29.9
BIL-20	1494	1200	24.5
NHIL-20	1427	995	43.4
WIL-20	1282	996	28.7
Tilili	1210	956	26.6

5 Enhanced Inbreeding Analysis

🧬 Inbreeding Coefficient (F) Analysis

The inbreeding coefficient (F) measures the probability that two alleles at a locus are identical by descent. This analysis uses enhanced thresholds to identify various levels of inbreeding:

Inbreeding Thresholds:

Threshold	Equivalent Relationship	Risk Level
F > 0%	Any common ancestor	Monitoring
F ≥ 6.25%	First cousin mating	Low
F ≥ 12.5%	Half-sib mating	Moderate
F ≥ 25%	Full-sib mating	High

Rate of Inbreeding (ΔF): The increase in F per generation should be kept below 1% to maintain long-term genetic diversity.

5.1 Detailed Inbreeding Analysis by Line and Generation

inbreeding_detailed <- bind_rows(
  compute_inbreeding_detailed(ebv_all, lil_data, "LIL-20"),
  compute_inbreeding_detailed(ebv_all, bil_data, "BIL-20"),
  compute_inbreeding_detailed(ebv_all, nhil_data, "NHIL-20"),
  compute_inbreeding_detailed(ebv_all, wil_data, "WIL-20"),
  compute_inbreeding_detailed(ebv_all, tilili_data, "Tilili")
)

if (!is.null(inbreeding_detailed) && nrow(inbreeding_detailed) > 0) {
  inbreeding_detailed %>%
    select(Line, Generation, N_Total, N_F_gt_0, Pct_F_gt_0, N_F_ge_6.25, Pct_F_ge_6.25,
           N_F_ge_12.5, Pct_F_ge_12.5, N_F_ge_25, Pct_F_ge_25, Mean_F, Median_F, Max_F) %>%
    kable(col.names = c("Line", "Gen", "N", "N(F>0%)", "%(F>0%)", 
                        "N(F≥6.25%)", "%(F≥6.25%)", "N(F≥12.5%)", "%(F≥12.5%)", 
                        "N(F≥25%)", "%(F≥25%)", "Mean F%", "Median F%", "Max F%"),
          align = c("l", "l", rep("r", 12))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE, font_size = 14) %>%
    row_spec(0, bold = TRUE, background = "#805ad5", color = "white")
}

Line	Gen	N	N(F>0%)	%(F>0%)	N(F≥6.25%)	%(F≥6.25%)	N(F≥12.5%)	%(F≥12.5%)	N(F≥25%)	%(F≥25%)	Mean F%	Median F%	Max F%
LIL-20	G0	291	0	0.0	0	0.0	0	0.0	0	0.0	0.00	0.00	0.00
LIL-20	G1	134	0	0.0	0	0.0	0	0.0	0	0.0	0.00	0.00	0.00
LIL-20	G2	362	26	7.2	26	7.2	26	7.2	15	4.1	1.42	0.00	25.00
BIL-20	G0	269	0	0.0	0	0.0	0	0.0	0	0.0	0.00	0.00	0.00
BIL-20	G1	124	0	0.0	0	0.0	0	0.0	0	0.0	0.00	0.00	0.00
BIL-20	G2	292	75	25.7	75	25.7	75	25.7	0	0.0	3.21	0.00	12.50
NHIL-20	G0	333	0	0.0	0	0.0	0	0.0	0	0.0	0.00	0.00	0.00
NHIL-20	G1	93	0	0.0	0	0.0	0	0.0	0	0.0	0.00	0.00	0.00
NHIL-20	G2	215	59	27.4	59	27.4	59	27.4	9	4.2	3.95	0.00	25.00
WIL-20	G0	237	0	0.0	0	0.0	0	0.0	0	0.0	0.00	0.00	0.00
WIL-20	G1	87	0	0.0	0	0.0	0	0.0	0	0.0	0.00	0.00	0.00
WIL-20	G2	275	46	16.7	46	16.7	46	16.7	10	3.6	2.55	0.00	25.00
Tilili	G3	503	357	71.0	47	9.3	20	4.0	1	0.2	2.08	0.83	25.00
Tilili	G4	2546	2322	91.2	402	15.8	172	6.8	23	0.9	3.05	1.55	25.76
Tilili	G5	787	785	99.7	140	17.8	8	1.0	0	0.0	3.67	2.87	14.47

if (!is.null(inbreeding_detailed) && nrow(inbreeding_detailed) > 0) {
  ggplot(inbreeding_detailed, aes(x = Generation, y = Mean_F, color = Line, group = Line)) +
    geom_line(linewidth = 1.2) +
    geom_point(size = 4) +
    geom_hline(yintercept = 12.5, linetype = "dashed", color = "#ed8936", linewidth = 0.8) +
    annotate("text", x = 1, y = 13.5, label = "Half-sib level (12.5%)", color = "#ed8936", 
             fontface = "italic", hjust = 0, size = 3.5) +
    scale_color_manual(values = line_colors) +
    labs(title = "Mean Inbreeding Coefficient by Line and Generation",
         subtitle = "Dashed line indicates half-sib mating level (F = 12.5%)",
         x = "Generation", y = "Mean Inbreeding Coefficient (F%)", color = "Line") +
    theme_report() +
    scale_y_continuous(limits = c(0, max(inbreeding_detailed$Mean_F, na.rm = TRUE) * 1.2))
}

5.2 Inbreeding Summary by Line

if (!is.null(inbreeding_detailed) && nrow(inbreeding_detailed) > 0) {
  inbreeding_summary <- inbreeding_detailed %>%
    group_by(Line) %>%
    summarise(
      N_Total_sum = sum(N_Total, na.rm = TRUE),
      # Calculate weighted mean manually: sum(x*w) / sum(w)
      Mean_F = round(sum(Mean_F * N_Total, na.rm = TRUE) / sum(N_Total, na.rm = TRUE), 2),
      Max_F = round(max(Max_F, na.rm = TRUE), 2),
      N_F_gt_0 = sum(N_F_gt_0, na.rm = TRUE),
      N_F_ge_12.5 = sum(N_F_ge_12.5, na.rm = TRUE),
      N_F_ge_25 = sum(N_F_ge_25, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    rename(N_Total = N_Total_sum) %>%
    mutate(
      Pct_F_gt_0 = round(N_F_gt_0 / N_Total * 100, 1),
      Pct_F_ge_12.5 = round(N_F_ge_12.5 / N_Total * 100, 1),
      Pct_F_ge_25 = round(N_F_ge_25 / N_Total * 100, 1)
    )
  
  inbreeding_summary %>%
    select(Line, N_Total, Mean_F, Max_F, Pct_F_gt_0, Pct_F_ge_12.5, Pct_F_ge_25) %>%
    kable(col.names = c("Line", "N Total", "Mean F%", "Max F%", "% with F>0%", "% with F≥12.5%", "% with F≥25%"),
          align = c("l", rep("r", 6))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
    row_spec(0, bold = TRUE, background = "#805ad5", color = "white")
}

Line	N Total	Mean F%	Max F%	% with F>0%	% with F≥12.5%	% with F≥25%
BIL-20	685	1.37	12.50	10.9	10.9	0.0
LIL-20	787	0.65	25.00	3.3	3.3	1.9
NHIL-20	641	1.32	25.00	9.2	9.2	1.4
Tilili	3836	3.05	25.76	90.3	5.2	0.6
WIL-20	599	1.17	25.00	7.7	7.7	1.7

5.3 Population Inbreeding Distribution by Line

# Create inbreeding category distribution for all lines
if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  
  # Categorize inbreeding levels
  ebv_with_categories <- ebv_all %>%
    filter(!is.na(Inbreeding_pct)) %>%
    mutate(
      F_Category = case_when(
        Inbreeding_pct == 0 ~ "Non-inbred (F = 0%)",
        Inbreeding_pct < 3.125 ~ "Below 2nd cousin (F < 3.125%)",
        Inbreeding_pct < 6.25 ~ "2nd cousin level (F < 6.25%)",
        Inbreeding_pct < 12.5 ~ "1st cousin level (F < 12.5%)",
        Inbreeding_pct < 25 ~ "Half-sib level (F < 25%)",
        TRUE ~ "Full-sib level (F ≥ 25%)"
      ),
      F_Category = factor(F_Category, levels = c(
        "Non-inbred (F = 0%)",
        "Below 2nd cousin (F < 3.125%)",
        "2nd cousin level (F < 6.25%)",
        "1st cousin level (F < 12.5%)",
        "Half-sib level (F < 25%)",
        "Full-sib level (F ≥ 25%)"
      ))
    )
  
  # Summary by line and category
  inbreeding_dist <- ebv_with_categories %>%
    group_by(Line, F_Category) %>%
    summarise(N_Animals = n(), .groups = "drop") %>%
    group_by(Line) %>%
    mutate(
      Percent = round(N_Animals / sum(N_Animals) * 100, 1),
      Cumulative = round(cumsum(N_Animals) / sum(N_Animals) * 100, 1)
    ) %>%
    ungroup()
  
  # Display for each line
  for (line_name in unique(inbreeding_dist$Line)) {
    cat("\n\n### ", line_name, " - Inbreeding Distribution\n\n", sep = "")
    
    line_dist <- inbreeding_dist %>% filter(Line == line_name)
    
    tbl <- line_dist %>%
      select(F_Category, N_Animals, Percent, Cumulative) %>%
      kable(col.names = c("Inbreeding Category", "N Animals", "Percent (%)", "Cumulative (%)"),
            align = c("l", rep("r", 3)), format = "html", escape = FALSE) %>%
      kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
      row_spec(0, bold = TRUE, background = line_colors[line_name], color = "white")
    
    cat(as.character(tbl))
    cat("\n\n")
  }
}

5.3.1 BIL-20 - Inbreeding Distribution

Inbreeding Category	N Animals	Percent (%)	Cumulative (%)
Non-inbred (F = 0%)	610	89.1	89.1
Half-sib level (F < 25%)	75	10.9	100.0

5.3.2 LIL-20 - Inbreeding Distribution

Inbreeding Category	N Animals	Percent (%)	Cumulative (%)
Non-inbred (F = 0%)	772	96.7	96.7
Half-sib level (F < 25%)	11	1.4	98.1
Full-sib level (F ≥ 25%)	15	1.9	100.0

5.3.3 NHIL-20 - Inbreeding Distribution

Inbreeding Category	N Animals	Percent (%)	Cumulative (%)
Non-inbred (F = 0%)	582	90.8	90.8
Half-sib level (F < 25%)	50	7.8	98.6
Full-sib level (F ≥ 25%)	9	1.4	100.0

5.3.4 Tilili - Inbreeding Distribution

Inbreeding Category	N Animals	Percent (%)	Cumulative (%)
Non-inbred (F = 0%)	408	10.5	10.5
Below 2nd cousin (F < 3.125%)	2233	57.5	68.0
2nd cousin level (F < 6.25%)	647	16.7	84.7
1st cousin level (F < 12.5%)	394	10.1	94.8
Half-sib level (F < 25%)	176	4.5	99.4
Full-sib level (F ≥ 25%)	24	0.6	100.0

5.3.5 WIL-20 - Inbreeding Distribution

Inbreeding Category	N Animals	Percent (%)	Cumulative (%)
Non-inbred (F = 0%)	554	92.3	92.3
Half-sib level (F < 25%)	36	6.0	98.3
Full-sib level (F ≥ 25%)	10	1.7	100.0

5.4 Inbreeding Coefficient Distribution by Line

if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  ebv_inbreeding <- ebv_all %>% filter(!is.na(Inbreeding_pct))
  
  ggplot(ebv_inbreeding, aes(x = Inbreeding_pct, fill = Line)) +
    geom_histogram(bins = 30, color = "white", alpha = 0.8) +
    geom_vline(xintercept = 0, linetype = "solid", color = "black", linewidth = 0.5) +
    geom_vline(xintercept = 6.25, linetype = "dashed", color = "#ed8936", linewidth = 0.8) +
    geom_vline(xintercept = 12.5, linetype = "dashed", color = "#e53e3e", linewidth = 0.8) +
    geom_vline(xintercept = 25, linetype = "dashed", color = "#9b2c2c", linewidth = 0.8) +
    facet_wrap(~Line, scales = "free_y", ncol = 2) +
    scale_fill_manual(values = line_colors) +
    labs(
      title = "Inbreeding Coefficient Distribution by Line",
      subtitle = "Dashed lines: Orange = 1st cousin (6.25%) | Red = Half-sib (12.5%) | Dark red = Full-sib (25%)",
      x = "Inbreeding Coefficient (F%)",
      y = "Number of Animals"
    ) +
    theme_report() +
    theme(legend.position = "none")
}

📋 Inbreeding Management Guidelines

Based on the inbreeding analysis across all lines:

Priority	Recommendation
1. Selection	Prioritize non-inbred or distantly related animals (F < 3.125%) for breeding decisions
2. Caution	Limit use of half-sib level animals (F ≥ 12.5%) as breeding candidates
3. Avoid	Avoid using full-sib level animals (F ≥ 25%) as parents to prevent inbreeding depression
4. Monitor	Monitor effective population size (Ne) to prevent future inbreeding accumulation
5. Optimize	Implement Optimal Contribution Selection (OCS) using MateSelect to balance genetic gain and diversity

Key thresholds to monitor:

F < 3.125%: Distantly related - preferred for breeding
F = 6.25%: First cousin level - acceptable but monitor
F = 12.5%: Half-sib level - limit usage
F = 25%: Full-sib level - avoid for breeding
ΔF < 1%: Target rate of inbreeding increase per generation

⚠️ Inbreeding Management Recommendations

Based on the analysis above:

Monitor lines with Mean F > 5%: These require careful mate selection to avoid further increases
Avoid mating relatives: Use pedigree information to prevent half-sib or closer matings
Maintain target mating ratios: 1:4 for ILRI lines and 1:10 for Tilili to distribute genetic contributions appropriately
Target ΔF < 1% per generation: Keep the rate of inbreeding accumulation low
Consider rotational mating: Systematically rotate sire usage across generations

6 Genetic Analysis

6.1 Variance Components and Heritability

ℹ️ Analysis Method

Genetic parameters were estimated using WOMBAT software with an animal model. The model included fixed effects: Sex, Batch, Year, Month. Trait: body weight at 16 weeks.

if (!is.null(vc_summary) && nrow(vc_summary) > 0) {
  vc_summary %>%
    kable(col.names = c("Line", "N", "Va", "SE(Va)", "Ve", "SE(Ve)", "Vp", "h²", "SE(h²)"),
          align = c("l", rep("r", 8))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#805ad5", color = "white") %>%
    row_spec(which(vc_summary$h2 > 0.6), bold = TRUE, background = "#fed7d7")
}

	Line	N	Va	SE(Va)	Ve	SE(Ve)	Vp	h²	SE(h²)
LIL	LIL-20	787	19579.0	4163.9	14212.6	2787.6	33791.6	0.579	0.097
BIL	BIL-20	685	12995.1	5250.1	30647.0	4169.6	43642.2	0.298	0.110
NHIL	NHIL-20	638	89703.6	0.0	28389.8	4796.9	118093.0	0.760	0.031
WIL	WIL-20	599	15082.8	5071.0	24419.8	3824.7	39502.6	0.382	0.112
Tilili	Tilili	3710	12914.1	2674.6	52893.5	2018.5	65807.5	0.196	0.037

if (!is.null(vc_summary) && nrow(vc_summary) > 0) {
  ggplot(vc_summary, aes(x = Line, y = h2, fill = Line)) +
    geom_bar(stat = "identity", width = 0.6) +
    geom_errorbar(aes(ymin = h2 - h2_SE, ymax = h2 + h2_SE), width = 0.2) +
    geom_text(aes(label = h2), vjust = -0.8, size = 5, fontface = "bold") +
    geom_hline(yintercept = 0.45, linetype = "dashed", color = "orange", linewidth = 1) +
    annotate("text", x = 0.6, y = 0.47, label = "Typical upper range (h² = 0.45)", hjust = 0, color = "orange", size = 3.5) +
    scale_fill_manual(values = line_colors) +
    labs(title = "Heritability Estimates for Body Weight at 16 Weeks",
         subtitle = "Error bars show ± 1 SE | Dashed line indicates typical upper range",
         x = "Line", y = "Heritability (h²)") +
    theme_report() + theme(legend.position = "none") +
    scale_y_continuous(limits = c(0, max(vc_summary$h2 + vc_summary$h2_SE, na.rm = TRUE) * 1.2))
}

6.2 Breeding Structure Analysis

6.2.1 Number of Sires and Dams by Generation

parents_all <- bind_rows(
  compute_parents_by_gen(lil_data, "LIL-20"),
  compute_parents_by_gen(bil_data, "BIL-20"),
  compute_parents_by_gen(nhil_data, "NHIL-20"),
  compute_parents_by_gen(wil_data, "WIL-20"),
  compute_parents_by_gen(tilili_data, "Tilili")
)

parents_wide <- parents_all %>%
  pivot_wider(names_from = Generation, values_from = c(N_Sires, N_Dams), names_sep = "_")

parents_wide %>%
  kable(align = c("l", rep("r", ncol(parents_wide) - 1))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#2d3748", color = "white")

Line	N_Sires_G1	N_Sires_G2	N_Sires_G3	N_Sires_G4	N_Sires_G5	N_Dams_G1	N_Dams_G2	N_Dams_G3	N_Dams_G4	N_Dams_G5
LIL-20	11	12	NA	NA	NA	22	43	NA	NA	NA
BIL-20	6	8	NA	NA	NA	27	39	NA	NA	NA
NHIL-20	5	6	NA	NA	NA	24	27	NA	NA	NA
WIL-20	7	9	NA	NA	NA	24	23	NA	NA	NA
Tilili	NA	NA	48	44	16	NA	NA	212	289	87

6.2.2 Sire EBV Summary by Generation

sire_ebv_by_gen <- bind_rows(
  compute_sire_ebv_by_gen(lil_data, ebv_all, "LIL-20"),
  compute_sire_ebv_by_gen(bil_data, ebv_all, "BIL-20"),
  compute_sire_ebv_by_gen(nhil_data, ebv_all, "NHIL-20"),
  compute_sire_ebv_by_gen(wil_data, ebv_all, "WIL-20"),
  compute_sire_ebv_by_gen(tilili_data, ebv_all, "Tilili")
)

if (!is.null(sire_ebv_by_gen) && nrow(sire_ebv_by_gen) > 0) {
  sire_ebv_by_gen %>%
    kable(col.names = c("Line", "Generation", "N Sires", "Sires with EBV", "N Offspring", 
                        "Mean EBV (g)", "SD (g)", "Min (g)", "Max (g)"),
          align = c("l", "l", rep("r", 7))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#276749", color = "white")
}

Line	Generation	N Sires	Sires with EBV	N Offspring	Mean EBV (g)	SD (g)	Min (g)	Max (g)
LIL-20	G1	11	11	157	114.3	72.5	-58.3	215.0
LIL-20	G2	12	12	446	171.5	64.8	70.2	276.3
BIL-20	G1	6	6	155	65.9	57.7	-21.2	143.8
BIL-20	G2	8	8	342	53.0	66.7	-61.8	113.5
NHIL-20	G1	5	5	151	348.2	148.9	218.4	578.0
NHIL-20	G2	6	6	244	448.7	122.0	310.2	634.7
WIL-20	G1	7	7	92	94.4	76.0	-9.2	224.6
WIL-20	G2	9	9	306	110.3	87.8	12.2	269.1
Tilili	G3	48	26	722	16.8	72.3	-82.1	169.0
Tilili	G4	44	29	4112	38.1	75.1	-126.9	220.6
Tilili	G5	16	14	1072	92.4	96.9	-82.4	255.7

6.3 EBV Summary Statistics

if (!is.null(ebv_stats) && nrow(ebv_stats) > 0) {
  ebv_stats %>%
    kable(col.names = c("Line", "N", "Mean EBV (g)", "SD (g)", "Min (g)", "Max (g)", "Mean F%", "N Inbred"),
          align = c("l", rep("r", 7))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#2d3748", color = "white")
}

Line	N	Mean EBV (g)	SD (g)	Min (g)	Max (g)	Mean F%	N Inbred
BIL-20	685	35.7	67.7	-217.2	207.8	1.37	75
LIL-20	798	71.1	111.9	-307.5	315.8	0.64	26
NHIL-20	641	175.6	298.0	-668.4	842.7	1.33	59
Tilili	3882	43.7	74.8	-182.3	312.6	3.03	3474
WIL-20	600	57.4	95.8	-243.0	293.7	1.17	46

if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  ggplot(ebv_all, aes(x = EBV, fill = Line)) +
    geom_histogram(bins = 30, alpha = 0.8, color = "white") +
    geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
    facet_wrap(~Line, scales = "free_y", ncol = 2) +
    scale_fill_manual(values = line_colors) +
    labs(title = "EBV Distribution for Body Weight at 16 Weeks",
         subtitle = "Red dashed line indicates EBV = 0", x = "EBV (g)", y = "Count") +
    theme_report() + theme(legend.position = "none")
}

6.4 Genetic Trend

all_pheno_with_id <- bind_rows(
  lil_data %>% select(Line, ID, Generation, BW_16wk),
  bil_data %>% select(Line, ID, Generation, BW_16wk),
  nhil_data %>% select(Line, ID, Generation, BW_16wk),
  wil_data %>% select(Line, ID, Generation, BW_16wk),
  tilili_data %>% select(Line, ID, Generation, BW_16wk)
)

if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  ebv_with_gen <- ebv_all %>%
    mutate(ID_original = as.character(ID_original)) %>%
    left_join(all_pheno_with_id %>% select(Line, ID, Generation) %>% mutate(ID = as.character(ID)), 
              by = c("Line" = "Line", "ID_original" = "ID")) %>%
    filter(!is.na(Generation))
  
  genetic_trend <- ebv_with_gen %>%
    group_by(Line, Generation) %>%
    summarise(N = n(), Mean_EBV = mean(EBV, na.rm = TRUE),
              SE_EBV = sd(EBV, na.rm = TRUE) / sqrt(n()), .groups = "drop") %>%
    mutate(Generation = paste0("G", Generation))
}

if (exists("genetic_trend") && nrow(genetic_trend) > 0) {
  ggplot(genetic_trend, aes(x = Generation, y = Mean_EBV, color = Line, group = Line)) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
    geom_line(linewidth = 1.2) + geom_point(size = 4) +
    geom_errorbar(aes(ymin = Mean_EBV - SE_EBV, ymax = Mean_EBV + SE_EBV), width = 0.1, linewidth = 0.8) +
    geom_text(aes(label = round(Mean_EBV, 1)), vjust = -1.5, size = 3.5, show.legend = FALSE) +
    scale_color_manual(values = line_colors) +
    labs(title = "Genetic Trend: Mean EBV by Generation",
         subtitle = "Error bars represent ± 1 SE",
         x = "Generation", y = "Mean EBV (g)", color = "Line") +
    theme_report() + scale_y_continuous(expand = expansion(mult = c(0.15, 0.15)))
}

6.5 Top Breeding Animals

if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  top_animals <- ebv_all %>%
    group_by(Line) %>%
    arrange(desc(EBV)) %>%
    slice_head(n = 10) %>%
    mutate(Rank = row_number()) %>%
    ungroup() %>%
    select(Line, Rank, ID_original, EBV, Accuracy, Inbreeding_pct) %>%
    mutate(EBV = round(EBV, 1),
           Accuracy = ifelse(is.na(Accuracy), "-", round(Accuracy, 2)),
           Inbreeding_pct = ifelse(is.na(Inbreeding_pct), "-", round(Inbreeding_pct, 2)))
  
  top_animals %>%
    kable(col.names = c("Line", "Rank", "Animal ID", "EBV (g)", "Accuracy", "Inbreeding %"),
          align = c("l", "c", "l", "r", "r", "r")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#276749", color = "white") %>%
    row_spec(which(top_animals$Rank == 1), bold = TRUE, background = "#c6f6d5")
}

Line	Rank	Animal ID	EBV (g)	Accuracy	Inbreeding %
BIL-20	1	C-7908	207.8	0.71	12.50
BIL-20	2	1222	204.3	0.54	0.00
BIL-20	3	C-7918	192.8	0.65	0.00
BIL-20	4	B-3407	189.9	0.78	0.00
BIL-20	5	C-7943	188.5	0.66	0.00
BIL-20	6	C-3263	184.5	0.66	0.00
BIL-20	7	1426	183.2	0.54	0.00
BIL-20	8	C-7075	178.4	0.71	12.50
BIL-20	9	1287	174.6	0.54	0.00
BIL-20	10	B-3091	169.2	0.68	0.00
LIL-20	1	C-7742	315.8	0.80	0.00
LIL-20	2	C-7562	299.9	0.81	0.00
LIL-20	3	C-7660	297.5	0.81	0.00
LIL-20	4	C-7565	297.0	0.81	0.00
LIL-20	5	C-8084	292.5	0.80	0.00
LIL-20	6	C-8158	285.6	0.81	0.00
LIL-20	7	C-8142	283.4	0.80	0.00
LIL-20	8	C-8147	281.1	0.81	0.00
LIL-20	9	C-7719	277.3	0.80	0.00
LIL-20	10	B-3323	276.3	0.91	0.00
NHIL-20	1	C-7171	842.7	0.90	25.00
NHIL-20	2	C-7136	842.5	0.89	12.50
NHIL-20	3	C-7178	805.5	0.87	0.00
NHIL-20	4	C-7180	784.1	0.87	0.00
NHIL-20	5	C-7981	774.7	0.89	12.50
NHIL-20	6	C-7137	756.8	0.89	12.50
NHIL-20	7	C-7128	727.8	0.87	0.00
NHIL-20	8	C-7181	724.8	0.87	0.00
NHIL-20	9	2384	717.4	0.87	0.00
NHIL-20	10	C-7966	693.7	0.87	0.00
Tilili	1	6233	312.6	0.74	25.76
Tilili	2	6235	295.0	0.74	25.76
Tilili	3	2481	273.2	0.74	25.76
Tilili	4	2485	267.3	0.74	25.76
Tilili	5	1941	265.6	0.74	25.76
Tilili	6	1370	257.4	0.74	25.76
Tilili	7	1371	255.7	0.74	25.76
Tilili	8	257	255.7	0.83	0.98
Tilili	9	1054	254.2	0.74	25.76
Tilili	10	1055	253.2	0.74	25.76
WIL-20	1	C-7498	293.7	0.71	0.00
WIL-20	2	B-3346	288.2	0.78	0.00
WIL-20	3	C-8280	287.2	0.72	0.00
WIL-20	4	C-7497	277.2	0.71	0.00
WIL-20	5	C-7435	276.1	0.72	0.00
WIL-20	6	B-3236	269.1	0.86	0.00
WIL-20	7	C-3153	265.1	0.71	0.00
WIL-20	8	C-7510	263.7	0.72	0.00
WIL-20	9	B-3368	262.6	0.78	0.00
WIL-20	10	C-8283	261.2	0.72	0.00

7 NHIL-20 Detailed Analysis

🔍 NHIL-20 Requires Further Investigation

The NHIL-20 line shows an unusually wide EBV distribution compared to other lines:

EBV range: -668.4g to 842.7g (vs. ~500g range in other lines)
Heritability: 0.76 (higher than typical 0.25-0.45 for body weight)
High genetic variance: Va = 89,704 (2-7x higher than other lines)

This section provides a detailed breakdown to investigate potential population substructure that may be inflating heritability estimates.

7.1 NHIL-20 Breeding Structure

nhil_parents <- compute_parents_by_gen(nhil_data, "NHIL-20")

nhil_parents %>%
  kable(col.names = c("Line", "Generation", "N Sires", "N Dams"),
        align = c("l", "l", "r", "r")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#805ad5", color = "white")

Line	Generation	N Sires	N Dams
NHIL-20	G1	5	24
NHIL-20	G2	6	27

7.2 NHIL-20 EBV Distribution by Generation

nhil_ebv_with_gen <- ebv_all %>%
  filter(Line == "NHIL-20") %>%
  mutate(ID_original = as.character(ID_original)) %>%
  left_join(nhil_data %>% select(ID, Generation) %>% mutate(ID = as.character(ID)),
            by = c("ID_original" = "ID")) %>%
  filter(!is.na(Generation)) %>%
  mutate(Generation = paste0("G", Generation))

nhil_ebv_by_gen <- nhil_ebv_with_gen %>%
  group_by(Generation) %>%
  summarise(N = n(), Mean_EBV = round(mean(EBV, na.rm = TRUE), 1),
            SD_EBV = round(sd(EBV, na.rm = TRUE), 1),
            Min_EBV = round(min(EBV, na.rm = TRUE), 1),
            Max_EBV = round(max(EBV, na.rm = TRUE), 1), .groups = "drop")

nhil_ebv_by_gen %>%
  kable(col.names = c("Generation", "N", "Mean EBV (g)", "SD (g)", "Min (g)", "Max (g)"),
        align = c("l", rep("r", 5))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#805ad5", color = "white")

Generation	N	Mean EBV (g)	SD (g)	Min (g)	Max (g)
G0	333	-3.0	266.8	-668.4	717.4
G1	93	278.5	175.6	-280.3	656.2
G2	215	407.7	183.2	-307.5	842.7

ggplot(nhil_ebv_with_gen, aes(x = EBV, fill = Generation)) +
  geom_histogram(bins = 25, alpha = 0.8, color = "white") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
  facet_wrap(~Generation, ncol = 3, scales = "free_y") +
  scale_fill_manual(values = c("G0" = "#3182ce", "G1" = "#38a169", "G2" = "#805ad5")) +
  labs(title = "NHIL-20: EBV Distribution by Generation",
       subtitle = "Red dashed line indicates EBV = 0", x = "EBV (g)", y = "Count") +
  theme_report() + theme(legend.position = "none")

7.3 NHIL-20 Population Substructure Detection (Gaussian Mixture Model)

nhil_ebv_data <- ebv_all %>%
  filter(Line == "NHIL-20") %>%
  mutate(ID_original = as.character(ID_original)) %>%
  left_join(nhil_data %>% 
              select(ID, Generation, Sire_ID, Dam_ID, Sex, BW_16wk) %>% 
              mutate(ID = as.character(ID)),
            by = c("ID_original" = "ID")) %>%
  filter(!is.na(EBV))

ebv_vector <- nhil_ebv_data$EBV
gmm_fit <- Mclust(ebv_vector, G = 1:6)
n_components <- gmm_fit$G
bic_values <- gmm_fit$BIC

cat("================================================================================\n")

## ================================================================================

cat("       GAUSSIAN MIXTURE MODEL RESULTS - NHIL-20\n")

##        GAUSSIAN MIXTURE MODEL RESULTS - NHIL-20

cat("================================================================================\n\n")

## ================================================================================

cat("Optimal number of components (subpopulations):", n_components, "\n\n")

## Optimal number of components (subpopulations): 2

nhil_ebv_data$Cluster <- as.factor(gmm_fit$classification)

if (gmm_fit$modelName %in% c("E", "V")) {
  cluster_sd <- sqrt(gmm_fit$parameters$variance$sigmasq)
  if (length(cluster_sd) == 1) { cluster_sd <- rep(cluster_sd, n_components) }
} else { cluster_sd <- rep(NA, n_components) }

cluster_params <- data.frame(
  Cluster = 1:n_components,
  Mean_EBV = round(gmm_fit$parameters$mean, 1),
  SD_EBV = round(cluster_sd, 1),
  Proportion = round(gmm_fit$parameters$pro * 100, 1)
)

cluster_params %>%
  kable(col.names = c("Cluster", "Mean EBV (g)", "SD (g)", "% of Population"),
        align = c("l", rep("r", 3)),
        caption = "GMM Cluster Parameters") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#805ad5", color = "white")

GMM Cluster Parameters
Cluster	Mean EBV (g)	SD (g)	% of Population
1	-151.1	185.2	33.8
2	342.1	185.2	66.2

ggplot(nhil_ebv_data, aes(x = EBV, fill = Cluster)) +
  geom_histogram(bins = 35, alpha = 0.8, color = "white", position = "identity") +
  geom_vline(xintercept = gmm_fit$parameters$mean, linetype = "dashed", 
             color = c("#3182ce", "#e53e3e", "#38a169", "#ed8936", "#805ad5", "#319795")[1:n_components],
             linewidth = 1) +
  scale_fill_manual(values = c("1" = "#3182ce", "2" = "#e53e3e", "3" = "#38a169", 
                                "4" = "#ed8936", "5" = "#805ad5", "6" = "#319795")) +
  labs(title = paste0("NHIL-20: Animals Classified into ", n_components, " Subpopulations"),
       subtitle = "Dashed lines indicate cluster means",
       x = "EBV (g)", y = "Count", fill = "Cluster") +
  theme_report()

🔍 GMM Analysis Conclusion for NHIL-20

Number of subpopulations detected: 2

if (n_components == 1) {
  cat("**Interpretation:** NHIL-20 appears to be a single homogeneous population.\n")
} else if (n_components == 2) {
  cat("**Interpretation:** NHIL-20 consists of **TWO distinct subpopulations**. This explains the bimodal EBV distribution and inflated heritability.\n\n")
  cat("**Recommendations:** Investigate the origin of animals in each cluster and consider analyzing each subpopulation separately.\n")
} else {
  cat(paste0("**Interpretation:** NHIL-20 consists of **", n_components, " distinct subpopulations**.\n"))
}

Interpretation: NHIL-20 consists of TWO distinct subpopulations. This explains the bimodal EBV distribution and inflated heritability.

Recommendations: Investigate the origin of animals in each cluster and consider analyzing each subpopulation separately.

8 Tilili In-Depth Analysis

🐔 About the Tilili Breeding Program

Tilili is a local Ethiopian chicken ecotype undergoing genetic improvement at ILRI to enhance productivity, climate resilience, and disease resistance while maintaining adaptation to smallholder scavenging conditions.

Program Phase	Details
Phase 1: Indoor Brooding	Weeks 0-9: Group pens, controlled conditions, wing tags for ID, weekly weights & health assessments, standardized nutrition (Sasso T451A guidelines)
Phase 2: Semi-Scavenging	Weeks 9-19: Outdoor paddocks, 80% restricted feeding to encourage foraging, evaluation of heat tolerance & foraging efficiency
Phase 3: Egg Production	Weeks 19-45: Selected birds transferred indoors, trap nest system for individual laying performance, monitoring fertility, hatchability & egg number
Breeding System	Natural mating (1:10 male:female ratio), controlled half-sib mating, culling of low performers (fertility & egg number), pedigree tracking
Selection Criteria	Body weight at 16 weeks, growth under semi-scavenging, disease resistance, climate adaptability, egg production traits
Target Environment	Smallholder farms with semi-scavenging systems

Key Evaluation Point: Body weight at 16 weeks represents market age for smallholder systems and concludes the semi-scavenging evaluation period.

8.1 Housing and Feed Management Protocol

📍 Three-Phase Breeding Protocol

Phase 1: Indoor Brooding (Weeks 0-9)

Chicks reared in group pens under controlled conditions at ILRI’s Addis Ababa facility
Individual wing tagging for lifetime identification
Standardized nutrition following Sasso T451A female guidelines
Weekly weight measurements and health assessments
Week 8: Blood collection (genotyping, antibody profiling), cloacal swabs (pathogen screening), photographic documentation

Phase 2: Semi-Scavenging Evaluation (Weeks 9-19)

Birds transition to outdoor paddocks after ID verification and baseline weight recording
Feeding regime: 80% of standard guidelines to encourage natural foraging behavior
Weekly monitoring: Body weight, health status, environmental adaptation (heat tolerance, foraging efficiency)
Dual treatment: Indoor-only birds remain in controlled housing for comparison

Phase 3: Egg Production & Breeding (Weeks 19-45)

Selected birds (based on growth, disease resistance, climate adaptability) transferred to indoor housing at 19 weeks
7-day acclimatization period to minimize stress
Trap nest system: Individual laying performance monitoring (automatic closing nest boxes)
Daily tracking: Egg weight, laying frequency, fertility rates, hatchability success
Breeding system: Natural mating at 1:10 male:female ratio, controlled half-sib mating for genetic diversity
Culling strategy: Less productive birds (low fertility, low egg number) replaced with new candidates
Detailed pedigree records maintained to prevent excessive inbreeding

8.1.1 Housing Management Summary

housing_protocol <- data.frame(
  Batch_Range = c("0-2", "3-13", "3-13"),
  Generation = c("G3", "G4", "G4-G5"),
  Treatment = c("Outdoor Only", "Indoor", "Outdoor"),
  Brooding_Phase = c("Indoor (Wk 0-9)", "Indoor (Wk 0-19)", "Indoor (Wk 0-9 or 0-11*)"),
  Outdoor_Phase = c("Outdoor (Wk 9-19)", "—", "Outdoor (Wk 9-19 or 11-19*)"),
  Release_Age = c("Week 9 (Day 63)", "—", "Week 9 or 11* (Day 63 or 77)"),
  Notes = c(
    "Pilot batches: outdoor only",
    "Entire growth period indoors",
    "*G5 released at Week 11"
  )
)

housing_protocol %>%
  kable(col.names = c("Batch Range", "Generation", "Treatment", "Brooding Phase", "Outdoor Phase", "Release Age", "Notes"),
        align = c("c", "c", "c", "l", "l", "c", "l")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#319795", color = "white") %>%
  column_spec(3, bold = TRUE, color = "#285e61")

Batch Range	Generation	Treatment	Brooding Phase	Outdoor Phase	Release Age	Notes
0-2	G3	Outdoor Only	Indoor (Wk 0-9)	Outdoor (Wk 9-19)	Week 9 (Day 63)	Pilot batches: outdoor only
3-13	G4	Indoor	Indoor (Wk 0-19)	—	—	Entire growth period indoors
3-13	G4-G5	Outdoor	Indoor (Wk 0-9 or 0-11*)	Outdoor (Wk 9-19 or 11-19*)	Week 9 or 11* (Day 63 or 77)	*G5 released at Week 11

# Load feed intake data from Excel
feed_file <- "Analysis_with_wombat2/TililiFeed/Feed offered G345.xlsx"

# Initialize empty data frames in case loading fails
feed_long <- data.frame()
feed_summary_16wk <- data.frame()
feed_summary_gen <- data.frame()

# Try to load feed data
tryCatch({
  if (file.exists(feed_file)) {
    feed_raw <- read_excel(feed_file, sheet = "Sheet1")
    
    # Reshape from wide to long format
    feed_long <- feed_raw %>%
      rename(Environment = Enviroment) %>%
      pivot_longer(cols = starts_with("Week"), 
                   names_to = "Week", 
                   values_to = "Feed_g_per_bird_per_day") %>%
      mutate(
        Week_num = as.numeric(gsub("Week", "", Week)),
        Generation = paste0("G", Generation),
        Batch = as.character(Batch),
        Feed_kg_per_bird_per_week = (Feed_g_per_bird_per_day * 7) / 1000
      )
    
    # Summary: Total feed offered per bird (Weeks 1-16)
    feed_summary_16wk <- feed_long %>%
      filter(Week_num <= 16) %>%
      group_by(Generation, Batch, Environment) %>%
      summarise(
        Total_Feed_kg_per_bird = sum(Feed_kg_per_bird_per_week, na.rm = TRUE),
        Avg_Daily_Feed_g = mean(Feed_g_per_bird_per_day, na.rm = TRUE),
        .groups = "drop"
      ) %>%
      arrange(Generation, Batch, Environment)
    
    # Overall summary by generation and environment
    feed_summary_gen <- feed_long %>%
      filter(Week_num <= 16) %>%
      group_by(Generation, Environment) %>%
      summarise(
        N_Batch_Treatment = n_distinct(Batch),
        Avg_Total_Feed_kg_per_bird = mean((Feed_g_per_bird_per_day * 7 / 1000) * 16, na.rm = TRUE),
        Min_Feed_kg = min((Feed_g_per_bird_per_day * 7 / 1000) * 16, na.rm = TRUE),
        Max_Feed_kg = max((Feed_g_per_bird_per_day * 7 / 1000) * 16, na.rm = TRUE),
        .groups = "drop"
      )
  }
}, error = function(e) {
  cat("Warning: Could not load feed data.\n")
})

8.1.2 Feed Intake Summary (Weeks 1-16)

if (nrow(feed_summary_gen) > 0) {
  feed_summary_gen %>%
    mutate(
      Avg_Total_Feed_kg_per_bird = round(Avg_Total_Feed_kg_per_bird, 2),
      Min_Feed_kg = round(Min_Feed_kg, 2),
      Max_Feed_kg = round(Max_Feed_kg, 2)
    ) %>%
    kable(col.names = c("Generation", "Environment", "N Batches", 
                        "Mean Feed (kg/bird)", "Min (kg)", "Max (kg)"),
          align = c("c", "c", "c", "r", "r", "r")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#319795", color = "white") %>%
    column_spec(4, bold = TRUE, color = "#234e52", background = "#e6fffa")
} else {
  cat("Feed summary data not available.\n")
}

Generation	Environment	N Batches	Mean Feed (kg/bird)	Min (kg)	Max (kg)
G3	Outdoor	3	4.98	1.23	8.96
G4	Indoor	11	5.16	1.23	8.29
G4	Outdoor	11	4.42	1.23	6.72
G5	Outdoor	4	5.00	1.23	8.20

8.1.3 Feed Allowance by Week

# Calculate mean feed by generation and week
if (nrow(feed_long) > 0) {
  feed_weekly <- feed_long %>%
    filter(Week_num <= 19) %>%
    group_by(Generation, Week_num) %>%
    summarise(
      Mean_Feed_g = mean(Feed_g_per_bird_per_day, na.rm = TRUE),
      Min_Feed_g = min(Feed_g_per_bird_per_day, na.rm = TRUE),
      Max_Feed_g = max(Feed_g_per_bird_per_day, na.rm = TRUE),
      .groups = "drop"
    )
  
  # Check if we have valid data
  if (nrow(feed_weekly) > 0 && !all(is.na(feed_weekly$Max_Feed_g))) {
    # Calculate y-axis maximum safely
    y_max <- max(feed_weekly$Max_Feed_g, na.rm = TRUE)
    y_breaks <- seq(0, ceiling(y_max / 20) * 20 + 20, by = 20)
    
    ggplot(feed_weekly, aes(x = Week_num, y = Mean_Feed_g, color = Generation, group = Generation)) +
      geom_line(linewidth = 1.2) +
      geom_point(size = 2.5) +
      geom_ribbon(aes(ymin = Min_Feed_g, ymax = Max_Feed_g, fill = Generation), alpha = 0.15, color = NA) +
      geom_vline(xintercept = 16, linetype = "dashed", color = "#CC3311", linewidth = 0.8) +
      annotate("text", x = 16.5, y = y_max * 0.95, 
               label = "Week 16\n(Evaluation)", hjust = 0, size = 3.5, color = "#CC3311", fontface = "bold") +
      scale_color_manual(values = gen_colors) +
      scale_fill_manual(values = gen_colors) +
      scale_x_continuous(breaks = seq(1, 19, by = 2)) +
      scale_y_continuous(breaks = y_breaks) +
      labs(
        title = "Feed Allowance Schedule by Generation (Weeks 1-19)",
        subtitle = "Shaded area shows range across batches/environments | G4 & G5 receive 1.2× baseline (G3)",
        x = "Week of Age",
        y = "Feed Allowance (g/bird/day)",
        color = "Generation",
        fill = "Generation"
      ) +
      theme_tilili()
  } else {
    cat("Note: Feed data not available for plotting.\n")
  }
} else {
  cat("Note: Feed data not available.\n")
}

# Feed by environment and generation
if (nrow(feed_long) > 0) {
  feed_env <- feed_long %>%
    filter(Week_num <= 16) %>%
    group_by(Generation, Environment, Week_num) %>%
    summarise(
      Mean_Feed_g = mean(Feed_g_per_bird_per_day, na.rm = TRUE),
      .groups = "drop"
    )
  
  ggplot(feed_env, aes(x = Week_num, y = Mean_Feed_g, color = Environment, linetype = Generation)) +
    geom_line(linewidth = 1.0) +
    geom_point(size = 2) +
    scale_color_manual(values = c("Indoor" = "#3182ce", "Outdoor" = "#38a169")) +
    scale_x_continuous(breaks = seq(1, 16, by = 2)) +
    labs(
      title = "Feed Allowance by Environment and Generation (Weeks 1-16)",
      subtitle = "Indoor vs. Outdoor treatments | Outdoor birds supplement through foraging",
      x = "Week of Age",
      y = "Feed Allowance (g/bird/day)",
      color = "Environment",
      linetype = "Generation"
    ) +
    theme_tilili()
} else {
  cat("Note: Feed data not available for environment comparison plot.\n")
}

📊 Feed Conversion and Growth Efficiency

The feed allowance data presented here represents the offered feed per bird per day. Actual feed intake and feed conversion ratio (FCR) would require measurement of:

Actual consumption: Feed disappearance (offered - refused/spillage)
Body weight gain: Weekly weight measurements
FCR calculation: Feed consumed (kg) ÷ Body weight gain (kg)

For outdoor birds, foraging contribution is not quantified but is expected to:

Reduce dependence on provided supplemental feed
Provide micronutrients (insects, greens)
Improve gut health and immune function
Lower feed costs for smallholder farmers

Future recommendations: Implement feed intake recording systems to estimate FCR by generation and housing system for economic evaluation.

✅ Key Feed Observations

Generation 3 baseline: Lower feed allowance as these were layer-type birds (no 1.2× multiplier)
Generations 4 & 5: Higher feed provision (1.2× baseline) to support dual-purpose genetics
Environment effect: Outdoor birds received 80% of standard allowance while indoor birds received 100% (ad libitum)
Foraging contribution: Outdoor birds compensate the 20% feed restriction through natural foraging (insects, seeds, vegetation)
Evaluation age: All birds assessed at Week 16 for body weight

8.2 Tilili Body Weight Genetic Parameters

tilili_summary <- pheno_stats %>% filter(Line == "Tilili")
tilili_vc_data <- if(!is.null(vc_summary)) vc_summary %>% filter(Line == "Tilili") else NULL

# Tilili-specific stats
n_total_tilili <- nrow(tilili_data)
n_with_bw_tilili <- sum(!is.na(tilili_data$BW_16wk))
mean_bw_tilili <- round(mean(tilili_data$BW_16wk, na.rm = TRUE), 0)
sd_bw_tilili <- round(sd(tilili_data$BW_16wk, na.rm = TRUE), 0)
cv_bw_tilili <- round(sd_bw_tilili / mean_bw_tilili * 100, 1)

male_bw_tilili <- round(mean(tilili_data$BW_16wk[tilili_data$Sex == "M"], na.rm = TRUE), 0)
female_bw_tilili <- round(mean(tilili_data$BW_16wk[tilili_data$Sex == "F"], na.rm = TRUE), 0)
dimorphism_tilili <- round((male_bw_tilili - female_bw_tilili) / female_bw_tilili * 100, 1)

gen_counts_tilili <- table(tilili_data$Generation)

# Get heritability for Tilili
h2_tilili <- if(!is.null(tilili_vc_data) && nrow(tilili_vc_data) > 0) tilili_vc_data$h2 else NA
h2_SE_tilili <- if(!is.null(tilili_vc_data) && nrow(tilili_vc_data) > 0) tilili_vc_data$h2_SE else NA

5,906

Total Birds

3,783

With BW_16wk

1073g

Mean Weight

0.196

Heritability

Generations

8.2.1 Phenotypic Summary

data.frame(
  Metric = c("Total Birds", "Birds with BW_16wk", "Mean BW_16wk (g)", "SD BW_16wk (g)", 
             "CV (%)", "Min BW_16wk (g)", "Max BW_16wk (g)",
             "Male BW_16wk (g)", "Female BW_16wk (g)", "Sexual Dimorphism (%)"),
  Value = c(n_total_tilili, n_with_bw_tilili, mean_bw_tilili, sd_bw_tilili, cv_bw_tilili,
            round(min(tilili_data$BW_16wk, na.rm = TRUE), 0),
            round(max(tilili_data$BW_16wk, na.rm = TRUE), 0),
            male_bw_tilili, female_bw_tilili, dimorphism_tilili)
) %>%
  kable(align = c("l", "r")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#319795", color = "white")

Metric	Value
Total Birds	5906.0
Birds with BW_16wk	3783.0
Mean BW_16wk (g)	1073.0
SD BW_16wk (g)	303.0
CV (%)	28.2
Min BW_16wk (g)	110.0
Max BW_16wk (g)	2210.0
Male BW_16wk (g)	1210.0
Female BW_16wk (g)	956.0
Sexual Dimorphism (%)	26.6

8.2.2 Population Structure by Generation

tilili_data %>%
  group_by(Generation) %>%
  summarise(
    N = n(),
    N_BW = sum(!is.na(BW_16wk)),
    Mean_BW = round(mean(BW_16wk, na.rm = TRUE), 0),
    SD_BW = round(sd(BW_16wk, na.rm = TRUE), 0),
    N_Sires = n_distinct(Sire_ID[!is.na(Sire_ID)]),
    N_Dams = n_distinct(Dam_ID[!is.na(Dam_ID)]),
    .groups = "drop"
  ) %>%
  mutate(Generation = paste0("G", Generation)) %>%
  kable(col.names = c("Generation", "N Total", "N with BW", "Mean BW (g)", "SD (g)", "N Sires", "N Dams"),
        align = c("l", rep("r", 6))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#319795", color = "white")

Generation	N Total	N with BW	Mean BW (g)	SD (g)	N Sires	N Dams
G3	722	458	1188	343	48	212
G4	4112	2537	1014	279	44	289
G5	1072	788	1198	295	16	87

tilili_data %>%
  filter(!is.na(BW_16wk)) %>%
  mutate(Generation = paste0("G", Generation)) %>%
  ggplot(aes(x = Generation, y = BW_16wk, fill = Generation)) +
  geom_boxplot(alpha = 0.8, outlier.shape = 21) +
  geom_jitter(alpha = 0.2, width = 0.2, size = 0.8) +
  scale_fill_manual(values = gen_colors) +
  labs(title = "Tilili: Body Weight at 16 Weeks by Generation",
       x = "Generation", y = "Body Weight (g)") +
  theme_tilili() + theme(legend.position = "none")

8.2.3 Variance Components and Heritability

if (!is.null(tilili_vc_data) && nrow(tilili_vc_data) > 0) {
  tilili_vc_data %>%
    kable(col.names = c("Line", "N", "Va", "SE(Va)", "Ve", "SE(Ve)", "Vp", "h²", "SE(h²)"),
          align = c("l", rep("r", 8))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
    row_spec(0, bold = TRUE, background = "#319795", color = "white")
} else {
  cat("Variance component results not available for Tilili.")
}

	Line	N	Va	SE(Va)	Ve	SE(Ve)	Vp	h²	SE(h²)
Tilili	Tilili	3710	12914.1	2674.6	52893.5	2018.5	65807.5	0.196	0.037

8.2.4 Tilili EBV Summary

tilili_ebv <- ebv_all %>% filter(Line == "Tilili")

if (nrow(tilili_ebv) > 0) {
  data.frame(
    Metric = c("N Animals with EBV", "Mean EBV (g)", "SD EBV (g)", 
               "Min EBV (g)", "Max EBV (g)", "EBV Range (g)"),
    Value = c(nrow(tilili_ebv),
              round(mean(tilili_ebv$EBV, na.rm = TRUE), 1),
              round(sd(tilili_ebv$EBV, na.rm = TRUE), 1),
              round(min(tilili_ebv$EBV, na.rm = TRUE), 1),
              round(max(tilili_ebv$EBV, na.rm = TRUE), 1),
              round(max(tilili_ebv$EBV, na.rm = TRUE) - min(tilili_ebv$EBV, na.rm = TRUE), 1))
  ) %>%
    kable(align = c("l", "r")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
    row_spec(0, bold = TRUE, background = "#319795", color = "white")
}

Metric	Value
N Animals with EBV	3882.0
Mean EBV (g)	43.7
SD EBV (g)	74.8
Min EBV (g)	-182.3
Max EBV (g)	312.6
EBV Range (g)	494.9

if (nrow(tilili_ebv) > 0) {
  ggplot(tilili_ebv, aes(x = EBV)) +
    geom_histogram(bins = 30, fill = "#319795", color = "white", alpha = 0.8) +
    geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) +
    geom_vline(xintercept = mean(tilili_ebv$EBV, na.rm = TRUE), 
               linetype = "solid", color = "#EE7733", linewidth = 1) +
    labs(title = "Tilili: EBV Distribution for Body Weight at 16 Weeks",
         subtitle = "Black dashed = 0 | Orange solid = Mean EBV",
         x = "EBV (g)", y = "Count") +
    theme_tilili()
}

8.2.5 Top 10 Tilili Breeding Animals (by EBV)

if (nrow(tilili_ebv) > 0) {
  tilili_ebv %>%
    arrange(desc(EBV)) %>%
    head(10) %>%
    mutate(
      Rank = 1:10,
      EBV = round(EBV, 1),
      SE_EBV = ifelse(is.na(SE_EBV), "-", round(SE_EBV, 1)),
      Accuracy = ifelse(is.na(Accuracy), "-", round(Accuracy, 2))
    ) %>%
    select(Rank, ID_original, EBV, SE_EBV, Accuracy) %>%
    kable(col.names = c("Rank", "Animal ID", "EBV (g)", "SE", "Accuracy"),
          align = c("c", "l", rep("r", 3))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
    row_spec(0, bold = TRUE, background = "#276749", color = "white") %>%
    row_spec(1, bold = TRUE, background = "#c6f6d5")
}

	Rank	Animal ID	EBV (g)	SE	Accuracy
Tilili.1733	1	6233	312.6	85.3	0.74
Tilili.1732	2	6235	295.0	85.3	0.74
Tilili.2637	3	2481	273.2	85.3	0.74
Tilili.2633	4	2485	267.3	85.3	0.74
Tilili.3075	5	1941	265.6	85.3	0.74
Tilili.3377	6	1370	257.4	85.3	0.74
Tilili.3376	7	1371	255.7	85.3	0.74
Tilili.703	8	257	255.7	63.7	0.83
Tilili.3528	9	1054	254.2	85.3	0.74
Tilili.3527	10	1055	253.2	85.3	0.74

8.3 Tilili Egg Weight Analysis

# Load and process egg weight data
df_egg_weight_raw <- tryCatch({
  read_csv(egg_weight_file, show_col_types = FALSE) %>%
    clean_names()
}, error = function(e) {
  data.frame()
})

if (nrow(df_egg_weight_raw) > 0) {
  week_cols_ew <- names(df_egg_weight_raw)[grepl("^week_", names(df_egg_weight_raw))]
  
  df_egg_weight_long <- df_egg_weight_raw %>%
    select(bird_id = chicken_id, sire_id, dam_id, generation = generation3, batch, pen, all_of(week_cols_ew)) %>%
    pivot_longer(cols = all_of(week_cols_ew), names_to = "week", values_to = "egg_weight") %>%
    mutate(
      week = as.numeric(str_extract(week, "\\d+")),
      egg_weight = na_if(egg_weight, 0),
      egg_weight = if_else(!is.na(egg_weight) & egg_weight > 100, NA_real_, egg_weight),
      generation = factor(generation, levels = sort(unique(generation)), labels = paste0("G", sort(unique(generation)))),
      batch = factor(batch)
    )
  
  n_birds_ew <- n_distinct(df_egg_weight_long$bird_id)
  n_records_ew <- sum(!is.na(df_egg_weight_long$egg_weight))
  mean_ew <- round(mean(df_egg_weight_long$egg_weight, na.rm = TRUE), 1)
  sd_ew <- round(sd(df_egg_weight_long$egg_weight, na.rm = TRUE), 1)
  n_gen_ew <- n_distinct(df_egg_weight_long$generation)
} else {
  n_birds_ew <- 0; n_records_ew <- 0; mean_ew <- NA; sd_ew <- NA; n_gen_ew <- 0
  df_egg_weight_long <- data.frame()
}

170

Birds

4,389

Valid Records

53.1g

Mean Weight

Generations

31-61

Week Range

8.3.1 Weekly Means by Generation

if (nrow(df_egg_weight_long) > 0) {
  weekly_gen_ew <- df_egg_weight_long %>%
    group_by(generation, week) %>%
    summarise(n = sum(!is.na(egg_weight)), mean = mean(egg_weight, na.rm = TRUE),
              sd = sd(egg_weight, na.rm = TRUE), se = sd / sqrt(n), .groups = "drop") %>%
    filter(n > 0)
} else {
  weekly_gen_ew <- data.frame()
}

if (nrow(weekly_gen_ew) > 0) {
  ggplot(weekly_gen_ew, aes(x = week, y = mean, color = generation, group = generation)) +
    geom_line(linewidth = 1.0) +
    geom_point(size = 2.0) +
    geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.3, alpha = 0.5) +
    scale_color_manual(values = gen_colors) +
    scale_x_continuous(breaks = seq(31, 61, by = 2)) +
    labs(title = "Tilili: Mean Egg Weight by Week of Age",
         subtitle = "Comparison by Generation | Error bars: ± 1 SE",
         x = "Week of Age", y = "Mean Egg Weight (g)", color = "Generation",
         caption = "Note: Zeros treated as missing | Outliers (>100g) removed") +
    theme_tilili()
}

8.3.2 Overall Generation Summary - Egg Weight

if (nrow(df_egg_weight_long) > 0) {
  df_egg_weight_long %>%
    group_by(generation) %>%
    summarise(n_birds = n_distinct(bird_id), n_records = sum(!is.na(egg_weight)),
              mean_egg_weight = round(mean(egg_weight, na.rm = TRUE), 1),
              sd_egg_weight = round(sd(egg_weight, na.rm = TRUE), 1),
              min_egg_weight = round(min(egg_weight, na.rm = TRUE), 1),
              max_egg_weight = round(max(egg_weight, na.rm = TRUE), 1), .groups = "drop") %>%
    kable(col.names = c("Generation", "N Birds", "N Records", "Mean (g)", "SD (g)", "Min (g)", "Max (g)"),
          align = c("l", rep("r", 6))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#319795", color = "white")
}

Generation	N Birds	N Records	Mean (g)	SD (g)	Min (g)	Max (g)
G3	85	2069	52.3	4.6	39	69
G4	85	2320	53.8	4.6	35	74

8.3.3 Weekly Means by Batch - Egg Weight

if (nrow(df_egg_weight_long) > 0) {
  weekly_batch_ew <- df_egg_weight_long %>%
    group_by(batch, week) %>%
    summarise(n = sum(!is.na(egg_weight)), mean = mean(egg_weight, na.rm = TRUE), .groups = "drop") %>%
    filter(n > 0)
  
  ggplot(weekly_batch_ew, aes(x = week, y = mean, color = batch, group = batch)) +
    geom_line(linewidth = 0.8) +
    geom_point(size = 1.5, alpha = 0.7) +
    scale_color_manual(values = batch_colors) +
    scale_x_continuous(breaks = seq(31, 61, by = 5)) +
    labs(title = "Tilili: Mean Egg Weight by Week of Age",
         subtitle = "Comparison by Batch",
         x = "Week of Age", y = "Mean Egg Weight (g)", color = "Batch") +
    theme_tilili()
}

8.4 Tilili Egg Number Analysis

# Load and process egg number data
# Define the egg number file paths explicitly
egg_num_merged <- file.path(wombat_base, "Tililieggnum", "Merged_egg.csv")
egg_num_master <- file.path(wombat_base, "Tililieggnum", "Masterfile_egg.csv")

# Load egg number data - try Merged_egg first (it's actually ODS format despite .csv extension)
df_egg_number_raw <- NULL

# First, try to read Merged_egg.csv as ODS (it's actually in ODS format)
if (file.exists(egg_num_merged)) {
  df_egg_number_raw <- tryCatch({
    temp_data <- read_ods(egg_num_merged, sheet = 1, col_names = TRUE)
    if (nrow(temp_data) > 0) {
      temp_data %>% clean_names()
    } else {
      NULL
    }
  }, error = function(e) {
    NULL
  })
}

# If Merged_egg failed or empty, use Masterfile_egg.csv
if (is.null(df_egg_number_raw) || nrow(df_egg_number_raw) == 0) {
  if (file.exists(egg_num_master)) {
    df_egg_number_raw <- tryCatch({
      read_csv(egg_num_master, show_col_types = FALSE) %>% clean_names()
    }, error = function(e) {
      tryCatch({
        read.csv(egg_num_master, stringsAsFactors = FALSE) %>% clean_names()
      }, error = function(e2) {
        data.frame()
      })
    })
  }
}

# Process the data
if (!is.null(df_egg_number_raw) && nrow(df_egg_number_raw) > 0) {
  # Get week columns
  week_cols_en <- names(df_egg_number_raw)[grepl("^week_", names(df_egg_number_raw))]
  
  # Find bird_id column - check multiple possible names
  possible_id_cols <- c("birds_id", "bird_id", "chicken_id", "chickenid", "id")
  bird_id_col <- NULL
  for (col in possible_id_cols) {
    if (col %in% names(df_egg_number_raw)) {
      bird_id_col <- col
      break
    }
  }
  
  if (!is.null(bird_id_col) && length(week_cols_en) > 0) {
    # Convert week columns to numeric
    df_egg_number_raw <- df_egg_number_raw %>%
      mutate(across(all_of(week_cols_en), ~as.numeric(as.character(.))))
    
    # Select available columns
    core_cols <- c(bird_id_col, "generation", "batch")
    optional_cols <- c("sire_id", "dam_id", "pen")
    available_core <- intersect(core_cols, names(df_egg_number_raw))
    available_optional <- intersect(optional_cols, names(df_egg_number_raw))
    all_select_cols <- c(available_core, available_optional, week_cols_en)
    
    df_egg_number_long <- df_egg_number_raw %>%
      select(all_of(all_select_cols)) %>%
      rename(bird_id = all_of(bird_id_col)) %>%
      pivot_longer(cols = all_of(week_cols_en), names_to = "week", values_to = "egg_number") %>%
      mutate(
        week = as.numeric(str_extract(week, "\\d+")),
        egg_number = na_if(egg_number, 0),
        generation = factor(generation, levels = sort(unique(generation)), labels = paste0("G", sort(unique(generation)))),
        batch = factor(batch)
      )
    
    n_birds_en <- n_distinct(df_egg_number_long$bird_id)
    n_records_en <- sum(!is.na(df_egg_number_long$egg_number))
    mean_en <- round(mean(df_egg_number_long$egg_number, na.rm = TRUE), 2)
    sd_en <- round(sd(df_egg_number_long$egg_number, na.rm = TRUE), 2)
    n_gen_en <- n_distinct(df_egg_number_long$generation)
    
    cat("Egg number data loaded:", n_birds_en, "birds,", n_records_en, "records\n")
  } else {
    cat("Warning: Could not find bird_id column or week columns in egg number data\n")
    cat("Available columns:", paste(names(df_egg_number_raw), collapse = ", "), "\n")
    n_birds_en <- 0; n_records_en <- 0; mean_en <- NA; sd_en <- NA; n_gen_en <- 0
    df_egg_number_long <- data.frame()
  }
} else {
  cat("Warning: Could not load egg number data from either file\n")
  n_birds_en <- 0; n_records_en <- 0; mean_en <- NA; sd_en <- NA; n_gen_en <- 0
  df_egg_number_long <- data.frame()
}

## Egg number data loaded: 180 birds, 4892 records

180

Birds

4,892

Valid Records

4.33

Mean Eggs/Week

Generations

31-61

Week Range

8.4.1 Weekly Means by Generation

if (nrow(df_egg_number_long) > 0) {
  weekly_gen_en <- df_egg_number_long %>%
    group_by(generation, week) %>%
    summarise(n = sum(!is.na(egg_number)), mean = mean(egg_number, na.rm = TRUE),
              sd = sd(egg_number, na.rm = TRUE), se = sd / sqrt(n), .groups = "drop") %>%
    filter(n > 0)
} else {
  weekly_gen_en <- data.frame()
}

if (nrow(weekly_gen_en) > 0) {
  ggplot(weekly_gen_en, aes(x = week, y = mean, color = generation, group = generation)) +
    geom_line(linewidth = 1.0) +
    geom_point(size = 2.0) +
    geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.3, alpha = 0.5) +
    scale_color_manual(values = gen_colors) +
    scale_x_continuous(breaks = seq(31, 61, by = 2)) +
    labs(title = "Tilili: Mean Egg Number by Week of Age",
         subtitle = "Comparison by Generation | Error bars: ± 1 SE",
         x = "Week of Age", y = "Mean Egg Number (eggs/week)", color = "Generation",
         caption = "Note: Zeros treated as missing") +
    theme_tilili()
}

8.4.2 Overall Generation Summary - Egg Number

if (nrow(df_egg_number_long) > 0) {
  df_egg_number_long %>%
    group_by(generation) %>%
    summarise(n_birds = n_distinct(bird_id), n_records = sum(!is.na(egg_number)),
              mean_egg_number = round(mean(egg_number, na.rm = TRUE), 2),
              sd_egg_number = round(sd(egg_number, na.rm = TRUE), 2),
              min_egg_number = round(min(egg_number, na.rm = TRUE), 2),
              max_egg_number = round(max(egg_number, na.rm = TRUE), 2), .groups = "drop") %>%
    kable(col.names = c("Generation", "N Birds", "N Records", "Mean", "SD", "Min", "Max"),
          align = c("l", rep("r", 6))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#319795", color = "white")
}

Generation	N Birds	N Records	Mean	SD	Min	Max
G3	90	2228	4.37	1.67	1.0	7
G4	90	2664	4.29	1.67	0.2	7

8.4.3 Weekly Means by Batch - Egg Number

if (nrow(df_egg_number_long) > 0) {
  weekly_batch_en <- df_egg_number_long %>%
    group_by(batch, week) %>%
    summarise(n = sum(!is.na(egg_number)), mean = mean(egg_number, na.rm = TRUE), .groups = "drop") %>%
    filter(n > 0)
  
  ggplot(weekly_batch_en, aes(x = week, y = mean, color = batch, group = batch)) +
    geom_line(linewidth = 0.8) +
    geom_point(size = 1.5, alpha = 0.7) +
    scale_color_manual(values = batch_colors) +
    scale_x_continuous(breaks = seq(31, 61, by = 5)) +
    labs(title = "Tilili: Mean Egg Number by Week of Age",
         subtitle = "Comparison by Batch",
         x = "Week of Age", y = "Mean Egg Number (eggs/week)", color = "Batch") +
    theme_tilili()
}

8.5 Tilili Combined Trait Summary

if (nrow(weekly_gen_ew) > 0 && nrow(weekly_gen_en) > 0) {
  combined_weekly <- bind_rows(
    weekly_gen_ew %>% select(generation, week, mean, se) %>% mutate(trait = "Egg Weight (g)"),
    weekly_gen_en %>% select(generation, week, mean, se) %>% mutate(trait = "Egg Number")
  )
  
  ggplot(combined_weekly, aes(x = week, y = mean, color = generation, group = generation)) +
    geom_line(linewidth = 1.0) +
    geom_point(size = 1.5) +
    geom_ribbon(aes(ymin = mean - se, ymax = mean + se, fill = generation), alpha = 0.2, color = NA) +
    facet_wrap(~ trait, scales = "free_y", ncol = 1) +
    scale_color_manual(values = gen_colors) +
    scale_fill_manual(values = gen_colors) +
    scale_x_continuous(breaks = seq(31, 61, by = 5)) +
    labs(title = "Tilili: Egg Production Traits by Week of Age",
         subtitle = "Generation Comparison | Shaded area: ± 1 SE",
         x = "Week of Age", y = "Mean Value", color = "Generation", fill = "Generation") +
    theme_tilili() +
    theme(strip.text = element_text(face = "bold", size = 12))
}

8.5.1 Overall Trait Summary Table

data.frame(
  Trait = c("Body Weight (16 wk)", "Egg Weight (g)", "Egg Number"),
  N_Birds = c(n_with_bw_tilili, n_birds_ew, n_birds_en),
  N_Records = c(n_with_bw_tilili, n_records_ew, n_records_en),
  Overall_Mean = c(mean_bw_tilili, mean_ew, mean_en),
  Overall_SD = c(sd_bw_tilili, sd_ew, sd_en),
  Heritability = c(ifelse(!is.na(h2_tilili), h2_tilili, "N/A"), "TBD", "TBD")
) %>%
  kable(col.names = c("Trait", "N Birds", "N Records", "Mean", "SD", "h²"),
        align = c("l", rep("r", 5))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#319795", color = "white")

Trait	N Birds	N Records	Mean	SD	h²
Body Weight (16 wk)	3783	3783	1073.00	303.00	0.196
Egg Weight (g)	170	4389	53.10	4.60	TBD
Egg Number	180	4892	4.33	1.67	TBD

9 Limitations

⚠️ Important Limitations to Consider

Small sample sizes: Population sizes range from 662 to 5906 animals per line, which limits statistical power.
Single trait analysis for most lines: Only body weight at 16 weeks is analyzed for all lines. Tilili additionally includes egg production traits.
Limited pedigree depth: Only a few generations. G0 founders have unknown parents.
Limited relatedness: Small number of sires and dams creates unbalanced family structures.
No genomic data: Pedigree-based relationships may not capture actual genetic relationships.
NHIL-20 heritability concern: The high h² (0.76) may be inflated due to population substructure (2 subpopulations detected).
Mating ratio variability: While targets are 1:4 (ILRI lines) and 1:10 (Tilili), actual observed ratios may vary by batch and generation.

10 Conclusions and Recommendations

10.1 Key Findings

✅ Summary of Results

Measure	Finding
Total birds evaluated	9,696 across 5 lines
Lines analyzed	LIL-20, BIL-20, NHIL-20, WIL-20, Tilili
Heritability range	0.196 to 0.76
Target mating ratio	1:4 (ILRI lines) / 1:10 (Tilili)
Genetic trend	Positive for most lines
NHIL-20 subpopulations	2 distinct clusters detected

Tilili-Specific Findings:

Trait	Value
Body Weight (16 wk)	Mean: 1073g (SD: 303g, CV: 28.2%)
Heritability	0.196 ± 0.037
Egg Weight	Mean: 53.1g across 4389 records
Egg Number	Mean: 4.33 eggs/week across 4892 records

10.2 Recommended Actions

📋 Priority Actions

For All Lines:

Maintain target mating ratios: 1:4 for ILRI lines (LIL-20, BIL-20, NHIL-20, WIL-20) and 1:10 for Tilili to match breeding objectives
Monitor inbreeding levels: Keep mean F below 12.5% (half-sib level) and avoid matings that would produce offspring with F ≥ 25%
Continue data collection: More phenotypic records are needed to improve precision of genetic parameter estimates
Establish cohort-based evaluation schedule: Conduct genetic evaluations per batch after BW_16wk data is recorded
Investigate NHIL-20: Further analysis of the 2 subpopulations — review breeding records and consider stratified analysis

For Tilili:

Estimate genetic parameters for egg traits: Run WOMBAT repeatability model analysis for egg weight and egg number
Develop selection index: Combine body weight (h² = 0.196) with egg production traits for balanced genetic improvement
Continue egg production recording: Expand to G5 birds as they reach laying age
Use top EBV animals: Select top breeding candidates identified with highest EBVs for body weight

General Recommendations:

Multi-trait selection index: Develop comprehensive index as data allows
Consider genomic tools: SNP genotyping for improved relationship estimation and genomic selection
Standardize ID systems: Ensure consistent identification across data collection and evaluation

10.3 Future Direction: Optimal Mate Allocation with MateSelect

🧬 MateSelect: Balancing Genetic Gain and Inbreeding Control

MateSelect is a specialized software tool designed for optimal mate allocation in livestock breeding programs. It uses advanced optimization algorithms to identify the best mating combinations that maximize genetic progress while constraining the rate of inbreeding accumulation.

10.3.1 Why MateSelect?

Current challenges in the ILRI poultry breeding program:

Limited number of sires: With 1:4 (ILRI) and 1:10 (Tilili) mating ratios, careful sire allocation is critical
Inbreeding accumulation: Some lines show F > 10% in later generations
Suboptimal mate pairing: Random or intuitive mating decisions may not optimize genetic gain
Multiple trait goals: Balancing body weight, egg production, and adaptability requires sophisticated planning

10.3.2 Key Benefits

Benefit	Impact
Maximize genetic gain	10-20% higher response to selection compared to random mating
Control inbreeding	Keep ΔF < 1% per generation to preserve genetic diversity
Optimal contribution	Balance individual EBVs with their relatedness to the population
Long-term sustainability	Maintain effective population size (Ne) for future generations

💻 How MateSelect Works

MateSelect uses Optimal Contribution Selection (OCS) theory to solve a constrained optimization problem:

Objective: Maximize genetic gain = Σ cᵢ × EBVᵢ

Subject to: Rate of inbreeding ΔF ≤ target (e.g., 0.5% or 1%)

Where: - cᵢ = contribution (proportion of offspring) from animal i - EBVᵢ = Estimated Breeding Value of animal i - ΔF = (Fₜ₊₁ - Fₜ) / (1 - Fₜ) = rate of inbreeding increase

The software determines:

Which animals to select as parents for the next generation
How many offspring each parent should contribute
Which specific matings to perform to minimize relationship between mates

📊 Expected Outcomes from Implementing MateSelect

Current Approach	With MateSelect
Random/intuitive mating	Optimized mate allocation
Uncontrolled inbreeding accumulation	ΔF constrained to < 1% per generation
Variable genetic gain	Maximized genetic response
Risk of genetic bottleneck	Maintained effective population size
Single-trait focus	Multi-trait optimization possible

Projected improvements:

Genetic gain: 15-25% higher genetic progress per generation
Inbreeding rate: Reduced by 30-50% compared to truncation selection
Long-term Ne: Maintained above critical threshold (Ne > 50)

🚀 Implementation Roadmap for MateSelect

Phase 1: Data Preparation

Export EBVs for all selection candidates from WOMBAT
Generate pedigree-based relationship matrix (A-matrix)
Define breeding objectives and trait weightings

Phase 2: Software Setup

Install MateSelect or equivalent OCS software (e.g., EVA, GENCONT)
Configure constraints: target ΔF, minimum/maximum contributions per animal
Set mating constraints: avoid full-sib matings, limit half-sib matings

Phase 3: Mate Allocation

Run optimization for each breeding cycle
Generate mate lists with specific sire × dam combinations
Implement matings according to optimal allocations

Phase 4: Monitoring & Feedback

Track realized inbreeding vs. predicted
Compare actual genetic gain to projections
Refine constraints and weightings based on outcomes

Timeline: Pilot implementation in 1-2 lines within the next breeding cycle, followed by program-wide adoption.

⚠️ Important Considerations

Data quality: MateSelect requires accurate EBVs and complete pedigrees — data cleaning is essential
Computational resources: Large populations may require significant processing time
Practical constraints: Account for housing, availability, and reproductive capacity of selected animals
Genetic diversity markers: Consider supplementing pedigree with genomic data for more accurate relationship estimation
Training: Staff training on software operation and interpretation of results

🐔 ILRI Poultry Genetics Program

International Livestock Research Institute (ILRI)

Report Generated: January 16, 2026 at 14:39

Analyst: Setegn Worku Alemu, PhD | Quantitative Geneticist

Contact: S.W.Alemu@cgiar.org

📍 Addis Ababa, Ethiopia | Nairobi, Kenya

“Advancing poultry genetics for smallholder farmers across Africa”

Combined Report with Mating Ratios (1:4 ILRI, 1:10 Tilili) & Enhanced Inbreeding Analysis

Includes In-Depth Tilili Analysis with Egg Production Traits

Poultry Breeding Program Evaluation

Combined Summary Report: LIL-20, BIL-20, NHIL-20, WIL-20, and Tilili Lines with In-Depth Tilili Analysis

Setegn Worku Alemu, PhD | Quantitative Geneticist

January 16, 2026

1 Executive Summary

📋 Internal Audit Purpose

2 Breed Management Context

2.0.1 Tilili Line Semi-Scavenging System

2.0.2 ILRI Lines Conventional Housing

📋 Line-by-Line Housing and Mating Summary

3 Mating Ratio Analysis

🐓 Mating Ratios by Breeding Line

3.1 Observed Mating Ratios by Line and Generation

⚠️ Mating Ratio Deviations

4 Phenotypic Summary

4.1 Population Structure

4.2 Body Weight at 16 Weeks

4.3 Phenotypic Trend

4.4 Sexual Dimorphism

5 Enhanced Inbreeding Analysis

🧬 Inbreeding Coefficient (F) Analysis

5.1 Detailed Inbreeding Analysis by Line and Generation

5.2 Inbreeding Summary by Line

5.3 Population Inbreeding Distribution by Line

5.3.1 BIL-20 - Inbreeding Distribution

5.3.2 LIL-20 - Inbreeding Distribution

5.3.3 NHIL-20 - Inbreeding Distribution

5.3.4 Tilili - Inbreeding Distribution

5.3.5 WIL-20 - Inbreeding Distribution

5.4 Inbreeding Coefficient Distribution by Line

📋 Inbreeding Management Guidelines

⚠️ Inbreeding Management Recommendations

6 Genetic Analysis

6.1 Variance Components and Heritability

ℹ️ Analysis Method

6.2 Breeding Structure Analysis

6.2.1 Number of Sires and Dams by Generation

6.2.2 Sire EBV Summary by Generation

6.3 EBV Summary Statistics

6.4 Genetic Trend

6.5 Top Breeding Animals

7 NHIL-20 Detailed Analysis

🔍 NHIL-20 Requires Further Investigation

7.1 NHIL-20 Breeding Structure

7.2 NHIL-20 EBV Distribution by Generation

7.3 NHIL-20 Population Substructure Detection (Gaussian Mixture Model)

🔍 GMM Analysis Conclusion for NHIL-20

8 Tilili In-Depth Analysis

🐔 About the Tilili Breeding Program

8.1 Housing and Feed Management Protocol

📍 Three-Phase Breeding Protocol

8.1.1 Housing Management Summary

8.1.2 Feed Intake Summary (Weeks 1-16)

8.1.3 Feed Allowance by Week

📊 Feed Conversion and Growth Efficiency

✅ Key Feed Observations

8.2 Tilili Body Weight Genetic Parameters

8.2.1 Phenotypic Summary

8.2.2 Population Structure by Generation

8.2.3 Variance Components and Heritability

8.2.4 Tilili EBV Summary

8.2.5 Top 10 Tilili Breeding Animals (by EBV)

8.3 Tilili Egg Weight Analysis

8.3.1 Weekly Means by Generation

8.3.2 Overall Generation Summary - Egg Weight

8.3.3 Weekly Means by Batch - Egg Weight

8.4 Tilili Egg Number Analysis

8.4.1 Weekly Means by Generation

8.4.2 Overall Generation Summary - Egg Number

8.4.3 Weekly Means by Batch - Egg Number

8.5 Tilili Combined Trait Summary

8.5.1 Overall Trait Summary Table

9 Limitations

⚠️ Important Limitations to Consider

10 Conclusions and Recommendations

10.1 Key Findings

✅ Summary of Results

10.2 Recommended Actions

📋 Priority Actions

10.3 Future Direction: Optimal Mate Allocation with MateSelect