library(readxl)
library(tidyverse)
library(janitor)
library(knitr)
library(kableExtra)
library(scales)
library(gridExtra)

if (!require(mclust, quietly = TRUE)) {
  install.packages("mclust", quiet = TRUE)
}
library(mclust)

if (!require(readODS, quietly = TRUE)) {
  install.packages("readODS", repos = "https://cloud.r-project.org", quiet = TRUE)
}
library(readODS)

# Custom theme for all lines
theme_report <- function() {
  theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(face = "bold", size = 14, hjust = 0, color = "#1a365d"),
      plot.subtitle = element_text(size = 11, hjust = 0, color = "#4a5568"),
      legend.position = "bottom",
      panel.grid.minor = element_blank(),
      axis.title = element_text(face = "bold", size = 11),
      strip.text = element_text(face = "bold", size = 11)
    )
}

# Tilili-specific theme (Teal palette)
theme_tilili <- function() {
  theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(face = "bold", size = 14, hjust = 0, color = "#234e52"),
      plot.subtitle = element_text(size = 11, hjust = 0, color = "#4a5568"),
      legend.position = "bottom",
      panel.grid.minor = element_blank(),
      axis.title = element_text(face = "bold", size = 11),
      strip.text = element_text(face = "bold", size = 11)
    )
}
theme_set(theme_report())

line_colors <- c("LIL-20" = "#3182ce", "BIL-20" = "#38a169", 
                 "NHIL-20" = "#805ad5", "WIL-20" = "#ed8936", "Tilili" = "#e53e3e")

# COLORBLIND-FRIENDLY PALETTE for Tilili generations
gen_colors <- c("G3" = "#0077BB", "G4" = "#EE7733", "G5" = "#009988")
batch_colors <- c("0" = "#0077BB", "3" = "#33BBEE", "4" = "#009988", 
                  "5" = "#EE7733", "6" = "#CC3311", "7" = "#EE3377")
working_dir <- "C:/Users/SAlemu/OneDrive - CGIAR/Documents/Poultryaudit/Dataset"
setwd(working_dir)

pheno_file_3lines <- "Additional_Poultry dataset_audit.xlsx"
pheno_file_wil <- "Dataset_evaluation_corrected.xlsx"
tilili_file <- "AllG-345.csv"

wombat_base <- "Analysis_with_wombat2"
model_dir <- "Model1_Animal_YearMonth"

# Tilili-specific paths for egg data
tilili_wombat_dir <- file.path(wombat_base, "Tilili", "Model1_Animal_YearMonth")
egg_weight_file <- file.path(wombat_base, "TililiAEW", "Masterfile_AEW.csv")
merged_egg_file <- file.path(wombat_base, "Tililieggnum", "Merged_egg.csv")
old_egg_file <- file.path(wombat_base, "Tililieggnum", "Masterfile_egg.csv")

# Determine egg number file
if (file.exists(merged_egg_file)) {
  tryCatch({
    test_read <- read_ods(merged_egg_file, sheet = 1, col_names = TRUE)
    if (nrow(test_read) > 0) {
      egg_number_file <- merged_egg_file
    } else {
      egg_number_file <- old_egg_file
    }
  }, error = function(e) {
    tryCatch({
      test_read <- read_csv(merged_egg_file, show_col_types = FALSE)
      if (nrow(test_read) > 0) {
        egg_number_file <- merged_egg_file
      } else {
        egg_number_file <- old_egg_file
      }
    }, error = function(e2) {
      egg_number_file <- old_egg_file
    })
  })
} else {
  egg_number_file <- old_egg_file
}
clean_names_custom <- function(df) {
  colnames(df) <- gsub(" ", "_", colnames(df))
  return(df)
}

convert_types <- function(df) {
  df$Generation <- as.character(df$Generation)
  if ("Batch" %in% colnames(df)) df$Batch <- as.character(df$Batch)
  df$ID <- as.character(df$ID)
  if ("Sire_ID" %in% colnames(df)) df$Sire_ID <- as.character(df$Sire_ID)
  if ("Dam_ID" %in% colnames(df)) df$Dam_ID <- as.character(df$Dam_ID)
  return(df)
}

lil_data <- read_excel(pheno_file_3lines, sheet = "LIL") %>% clean_names_custom() %>% convert_types()
bil_data <- read_excel(pheno_file_3lines, sheet = "BIL") %>% clean_names_custom() %>% convert_types()
nhil_data <- read_excel(pheno_file_3lines, sheet = "NHIL") %>% clean_names_custom() %>% convert_types()
wil_data <- read_excel(pheno_file_wil) %>% clean_names_custom() %>% convert_types()

tilili_raw <- read.csv(tilili_file, stringsAsFactors = FALSE, fileEncoding = "Latin1")
colnames(tilili_raw) <- trimws(colnames(tilili_raw))

tilili_data <- data.frame(
  ID = as.character(tilili_raw$ChickenID),
  Sire_ID = as.character(tilili_raw$SireID),
  Dam_ID = as.character(tilili_raw$DamId),
  Sex = as.character(tilili_raw$Sex),
  Batch = as.character(tilili_raw$Batch),
  Generation = as.character(tilili_raw$Generation),
  BW_16wk = as.numeric(tilili_raw$Week16),
  stringsAsFactors = FALSE
)

tilili_data$Sire_ID[tilili_data$Sire_ID %in% c("", "0", "NA", "na") | is.na(tilili_data$Sire_ID)] <- NA
tilili_data$Dam_ID[tilili_data$Dam_ID %in% c("", "0", "NA", "na") | is.na(tilili_data$Dam_ID)] <- NA

lil_data$Line <- "LIL-20"
bil_data$Line <- "BIL-20"
nhil_data$Line <- "NHIL-20"
wil_data$Line <- "WIL-20"
tilili_data$Line <- "Tilili"
lines <- c("LIL", "BIL", "NHIL", "WIL", "Tilili")
all_vc <- list()
all_ebv <- list()

extract_wombat <- function(line_code) {
  sum_est_file <- file.path(wombat_base, line_code, model_dir, "SumEstimates.out")
  ebv_file <- file.path(wombat_base, line_code, model_dir, "RnSoln_animal.dat")
  id_map_file <- file.path(wombat_base, line_code, "ID_mapping.csv")
  
  result <- list(vc = NULL, ebv = NULL)
  
  if (!file.exists(sum_est_file)) return(result)
  
  line_label <- if (line_code == "Tilili") "Tilili" else paste0(line_code, "-20")
  
  sum_lines <- readLines(sum_est_file)
  Va <- NA; Va_SE <- NA; Ve <- NA; Ve_SE <- NA; Vp <- NA; h2 <- NA; h2_SE <- NA; N_rec <- NA
  
  covs_a_idx <- grep("COVS A 1 1.*vrat", sum_lines)
  if (length(covs_a_idx) > 0) {
    covs_a_line <- sum_lines[covs_a_idx[1]]
    nums <- as.numeric(unlist(regmatches(covs_a_line, gregexpr("[0-9]+\\.?[0-9]*", covs_a_line))))
    if (length(nums) >= 7) { Va <- nums[4]; Va_SE <- nums[5]; h2 <- nums[6]; h2_SE <- nums[7] }
  }
  
  covs_z_idx <- grep("COVS Z 1 1.*vrat", sum_lines)
  if (length(covs_z_idx) > 0) {
    covs_z_line <- sum_lines[covs_z_idx[1]]
    nums_z <- as.numeric(unlist(regmatches(covs_z_line, gregexpr("[0-9]+\\.?[0-9]*", covs_z_line))))
    if (length(nums_z) >= 5) { Ve <- nums_z[4]; Ve_SE <- nums_z[5] }
  }
  
  covs_t_idx <- grep("COVS T 1 1", sum_lines)
  if (length(covs_t_idx) > 0) {
    covs_t_line <- sum_lines[covs_t_idx[1]]
    nums_t <- as.numeric(unlist(regmatches(covs_t_line, gregexpr("[0-9]+\\.?[0-9]*", covs_t_line))))
    if (length(nums_t) >= 5) { Vp <- nums_t[4] }
  }
  
  rec_idx <- grep("No. of records", sum_lines)
  if (length(rec_idx) > 0) {
    rec_line <- sum_lines[rec_idx[1]]
    nums_rec <- as.numeric(unlist(regmatches(rec_line, gregexpr("[0-9]+", rec_line))))
    N_rec <- nums_rec[1]
  }
  
  if (!is.na(h2)) {
    result$vc <- data.frame(
      Line = line_label, N = ifelse(!is.na(N_rec), N_rec, NA),
      Va = ifelse(!is.na(Va), round(Va, 1), NA), Va_SE = ifelse(!is.na(Va_SE), round(Va_SE, 1), NA),
      Ve = ifelse(!is.na(Ve), round(Ve, 1), NA), Ve_SE = ifelse(!is.na(Ve_SE), round(Ve_SE, 1), NA),
      Vp = ifelse(!is.na(Vp), round(Vp, 1), NA), h2 = round(h2, 3), h2_SE = round(h2_SE, 3)
    )
  }
  
  if (file.exists(ebv_file) && file.exists(id_map_file)) {
    ebv_lines <- readLines(ebv_file)
    header_idx <- grep("Run_No", ebv_lines)
    
    if (length(header_idx) > 0) {
      data_start <- header_idx[1] + 1
      data_lines <- ebv_lines[data_start:length(ebv_lines)]
      data_lines <- data_lines[nchar(trimws(data_lines)) > 0]
      
      ebv_data <- read.table(text = data_lines, header = FALSE, fill = TRUE)
      
      if (ncol(ebv_data) >= 7) {
        colnames(ebv_data) <- c("Run_No", "ID_num", "Trait", "EBV", "SE_EBV", "Accuracy", "Inbreeding_pct")
      } else {
        colnames(ebv_data) <- c("Run_No", "ID_num", "Trait", "EBV", "Inbreeding_pct")[1:ncol(ebv_data)]
      }
      
      id_map <- read.csv(id_map_file, stringsAsFactors = FALSE)
      ebv_merged <- merge(ebv_data, id_map, by.x = "ID_num", by.y = "ID_num", all.x = TRUE)
      
      result$ebv <- data.frame(
        Line = line_label, ID_original = as.character(ebv_merged$ID_orig),
        ID_wombat = ebv_merged$ID_num, EBV = ebv_merged$EBV,
        SE_EBV = if ("SE_EBV" %in% colnames(ebv_merged)) ebv_merged$SE_EBV else NA,
        Accuracy = if ("Accuracy" %in% colnames(ebv_merged)) ebv_merged$Accuracy else NA,
        Inbreeding_pct = if ("Inbreeding_pct" %in% colnames(ebv_merged)) ebv_merged$Inbreeding_pct else NA
      )
    }
  }
  
  return(result)
}

for (ln in lines) {
  res <- extract_wombat(ln)
  if (!is.null(res$vc)) all_vc[[ln]] <- res$vc
  if (!is.null(res$ebv)) all_ebv[[ln]] <- res$ebv
}

vc_summary <- do.call(rbind, all_vc)
ebv_all <- do.call(rbind, all_ebv)
compute_pheno_stats <- function(data, line_name) {
  total <- nrow(data)
  g0 <- sum(data$Generation == "0", na.rm = TRUE)
  g1 <- sum(data$Generation == "1", na.rm = TRUE)
  g2 <- sum(data$Generation == "2", na.rm = TRUE)
  g3 <- sum(data$Generation == "3", na.rm = TRUE)
  g4 <- sum(data$Generation == "4", na.rm = TRUE)
  g5 <- sum(data$Generation == "5", na.rm = TRUE)
  
  if ("Cull_Reason" %in% colnames(data)) {
    deaths <- sum(!is.na(data$Cull_Reason) & data$Cull_Reason != "", na.rm = TRUE)
  } else if ("Reason" %in% colnames(data)) {
    deaths <- sum(!is.na(data$Reason) & data$Reason != "", na.rm = TRUE)
  } else { deaths <- 0 }
  survival <- round((1 - deaths / total) * 100, 1)
  
  bw16_n <- sum(!is.na(data$BW_16wk))
  bw16_mean <- round(mean(data$BW_16wk, na.rm = TRUE), 0)
  bw16_sd <- round(sd(data$BW_16wk, na.rm = TRUE), 0)
  bw16_min <- round(min(data$BW_16wk, na.rm = TRUE), 0)
  bw16_max <- round(max(data$BW_16wk, na.rm = TRUE), 0)
  bw16_cv <- round(bw16_sd / bw16_mean * 100, 1)
  
  male_bw <- data %>% filter(Sex == "M") %>% pull(BW_16wk) %>% mean(na.rm = TRUE) %>% round(0)
  female_bw <- data %>% filter(Sex == "F") %>% pull(BW_16wk) %>% mean(na.rm = TRUE) %>% round(0)
  dimorphism <- round((male_bw - female_bw) / female_bw * 100, 1)
  
  afe_col <- colnames(data)[grepl("AFE", colnames(data), ignore.case = TRUE)][1]
  egg_n <- if (!is.na(afe_col)) sum(!is.na(data[[afe_col]])) else 0
  n_females <- sum(data$Sex == "F", na.rm = TRUE)
  egg_pct <- ifelse(n_females > 0, round(egg_n / n_females * 100, 1), 0)
  
  data.frame(
    Line = line_name, Total = total, G0 = g0, G1 = g1, G2 = g2, G3 = g3, G4 = g4, G5 = g5,
    Survival_Pct = survival, BW16_N = bw16_n, BW16_Mean = bw16_mean, BW16_SD = bw16_sd,
    BW16_Min = bw16_min, BW16_Max = bw16_max, BW16_CV = bw16_cv,
    Male_BW16 = male_bw, Female_BW16 = female_bw, Dimorphism_Pct = dimorphism,
    Egg_N = egg_n, Egg_Pct_Females = egg_pct
  )
}

pheno_stats <- bind_rows(
  compute_pheno_stats(lil_data, "LIL-20"),
  compute_pheno_stats(bil_data, "BIL-20"),
  compute_pheno_stats(nhil_data, "NHIL-20"),
  compute_pheno_stats(wil_data, "WIL-20"),
  compute_pheno_stats(tilili_data, "Tilili")
)

ebv_stats <- ebv_all %>%
  group_by(Line) %>%
  summarise(N = n(), Mean_EBV = round(mean(EBV, na.rm = TRUE), 1),
            SD_EBV = round(sd(EBV, na.rm = TRUE), 1),
            Min_EBV = round(min(EBV, na.rm = TRUE), 1),
            Max_EBV = round(max(EBV, na.rm = TRUE), 1),
            Mean_F = round(mean(Inbreeding_pct, na.rm = TRUE), 2),
            N_Inbred = sum(Inbreeding_pct > 0, na.rm = TRUE), .groups = "drop")

total_birds <- sum(pheno_stats$Total)
compute_parents_by_gen <- function(data, line_name) {
  sire_by_gen <- data %>%
    filter(!is.na(Sire_ID) & Sire_ID != "" & Sire_ID != "0" & Sire_ID != "NA") %>%
    group_by(Generation) %>%
    summarise(N_Sires = n_distinct(Sire_ID), .groups = "drop")
  
  dam_by_gen <- data %>%
    filter(!is.na(Dam_ID) & Dam_ID != "" & Dam_ID != "0" & Dam_ID != "NA") %>%
    group_by(Generation) %>%
    summarise(N_Dams = n_distinct(Dam_ID), .groups = "drop")
  
  full_join(sire_by_gen, dam_by_gen, by = "Generation") %>%
    mutate(Line = line_name, Generation = paste0("G", Generation)) %>%
    select(Line, Generation, N_Sires, N_Dams)
}

# Enhanced inbreeding calculation function with robust metrics
compute_inbreeding_detailed <- function(ebv_data, pheno_data, line_name) {
  ebv_with_gen <- ebv_data %>%
    filter(Line == line_name) %>%
    mutate(ID_original = as.character(ID_original)) %>%
    left_join(pheno_data %>% select(ID, Generation) %>% mutate(ID = as.character(ID)),
              by = c("ID_original" = "ID")) %>%
    filter(!is.na(Generation))
  
  if (nrow(ebv_with_gen) == 0) return(NULL)
  
  ebv_with_gen %>%
    group_by(Generation) %>%
    summarise(
      N_Total = n(), 
      N_F_gt_0 = sum(Inbreeding_pct > 0, na.rm = TRUE),
      N_F_ge_6.25 = sum(Inbreeding_pct >= 6.25, na.rm = TRUE),
      N_F_ge_12.5 = sum(Inbreeding_pct >= 12.5, na.rm = TRUE),
      N_F_ge_25 = sum(Inbreeding_pct >= 25, na.rm = TRUE),
      Mean_F = round(mean(Inbreeding_pct, na.rm = TRUE), 2),
      Median_F = round(median(Inbreeding_pct, na.rm = TRUE), 2),
      Max_F = round(max(Inbreeding_pct, na.rm = TRUE), 2),
      SD_F = round(sd(Inbreeding_pct, na.rm = TRUE), 2),
      .groups = "drop"
    ) %>%
    mutate(
      Line = line_name, 
      Generation = paste0("G", Generation),
      Pct_F_gt_0 = round(N_F_gt_0 / N_Total * 100, 1),
      Pct_F_ge_6.25 = round(N_F_ge_6.25 / N_Total * 100, 1),
      Pct_F_ge_12.5 = round(N_F_ge_12.5 / N_Total * 100, 1),
      Pct_F_ge_25 = round(N_F_ge_25 / N_Total * 100, 1),
      # Delta F calculation (rate of inbreeding)
      Delta_F = ifelse(Mean_F > 0, round(Mean_F / 100, 4), NA)
    ) %>%
    select(Line, Generation, N_Total, N_F_gt_0, Pct_F_gt_0, N_F_ge_6.25, Pct_F_ge_6.25,
           N_F_ge_12.5, Pct_F_ge_12.5, N_F_ge_25, Pct_F_ge_25, Mean_F, Median_F, SD_F, Max_F, Delta_F)
}

# Compute mating ratio from pedigree data
compute_mating_ratio <- function(data, line_name) {
  mating_info <- data %>%
    filter(!is.na(Sire_ID) & Sire_ID != "" & Sire_ID != "0" & Sire_ID != "NA" &
           !is.na(Dam_ID) & Dam_ID != "" & Dam_ID != "0" & Dam_ID != "NA") %>%
    group_by(Generation) %>%
    summarise(
      N_Offspring = n(),
      N_Sires = n_distinct(Sire_ID),
      N_Dams = n_distinct(Dam_ID),
      .groups = "drop"
    ) %>%
    mutate(
      Line = line_name,
      Generation = paste0("G", Generation),
      Mating_Ratio = round(N_Dams / N_Sires, 1),
      Mating_Ratio_Label = paste0("1:", round(N_Dams / N_Sires, 0))
    ) %>%
    select(Line, Generation, N_Sires, N_Dams, N_Offspring, Mating_Ratio, Mating_Ratio_Label)
  
  return(mating_info)
}

compute_sire_ebv <- function(data, ebv_data, line_name) {
  sires <- data %>%
    filter(!is.na(Sire_ID) & Sire_ID != "" & Sire_ID != "0" & Sire_ID != "NA") %>%
    pull(Sire_ID) %>% unique()
  
  sire_ebvs <- ebv_data %>% filter(Line == line_name, ID_original %in% sires)
  
  if (nrow(sire_ebvs) > 0) {
    data.frame(Line = line_name, N_Sires = length(sires), N_Sires_With_EBV = nrow(sire_ebvs),
      Mean_Sire_EBV = round(mean(sire_ebvs$EBV, na.rm = TRUE), 1),
      SD_Sire_EBV = round(sd(sire_ebvs$EBV, na.rm = TRUE), 1),
      Min_Sire_EBV = round(min(sire_ebvs$EBV, na.rm = TRUE), 1),
      Max_Sire_EBV = round(max(sire_ebvs$EBV, na.rm = TRUE), 1))
  } else {
    data.frame(Line = line_name, N_Sires = length(sires), N_Sires_With_EBV = 0,
               Mean_Sire_EBV = NA, SD_Sire_EBV = NA, Min_Sire_EBV = NA, Max_Sire_EBV = NA)
  }
}

compute_sire_ebv_by_gen <- function(data, ebv_data, line_name) {
  offspring_data <- data %>%
    filter(!is.na(Sire_ID) & Sire_ID != "" & Sire_ID != "0" & Sire_ID != "NA") %>%
    select(ID, Sire_ID, Generation) %>%
    mutate(ID = as.character(ID), Sire_ID = as.character(Sire_ID))
  
  if (nrow(offspring_data) == 0) return(NULL)
  
  sire_by_gen <- offspring_data %>%
    group_by(Generation) %>%
    summarise(Sires = list(unique(Sire_ID)), N_Offspring = n(), .groups = "drop")
  
  sire_ebvs <- ebv_data %>%
    filter(Line == line_name) %>%
    select(ID_original, EBV) %>%
    mutate(ID_original = as.character(ID_original))
  
  result <- lapply(1:nrow(sire_by_gen), function(i) {
    gen <- sire_by_gen$Generation[i]
    sires_in_gen <- unlist(sire_by_gen$Sires[i])
    n_offspring <- sire_by_gen$N_Offspring[i]
    
    sire_ebv_gen <- sire_ebvs %>% filter(ID_original %in% sires_in_gen)
    
    if (nrow(sire_ebv_gen) > 0) {
      data.frame(Line = line_name, Generation = paste0("G", gen), N_Sires = length(sires_in_gen),
        N_Sires_With_EBV = nrow(sire_ebv_gen), N_Offspring = n_offspring,
        Mean_Sire_EBV = round(mean(sire_ebv_gen$EBV, na.rm = TRUE), 1),
        SD_Sire_EBV = round(sd(sire_ebv_gen$EBV, na.rm = TRUE), 1),
        Min_Sire_EBV = round(min(sire_ebv_gen$EBV, na.rm = TRUE), 1),
        Max_Sire_EBV = round(max(sire_ebv_gen$EBV, na.rm = TRUE), 1))
    } else {
      data.frame(Line = line_name, Generation = paste0("G", gen), N_Sires = length(sires_in_gen),
        N_Sires_With_EBV = 0, N_Offspring = n_offspring,
        Mean_Sire_EBV = NA, SD_Sire_EBV = NA, Min_Sire_EBV = NA, Max_Sire_EBV = NA)
    }
  })
  
  do.call(rbind, result)
}

1 Executive Summary

This comprehensive report presents descriptive statistics and genetic analysis results for five ILRI poultry lines: LIL-20, BIL-20, NHIL-20, WIL-20, and Tilili (local Ethiopian breed). The evaluation encompasses phenotypic characterization across multiple generations and genetic parameter estimation using WOMBAT software for body weight at 16 weeks, including in-depth Tilili analysis with egg production traits.

9,696
Total Birds
All 5 Lines
1215
LIL-20
797
BIL-20
1116
NHIL-20
662
WIL-20
5906
Tilili
Local Breed

📋 Internal Audit Purpose

This genetic evaluation was initiated for internal audit purposes to assess the current status of ILRI’s poultry breeding program. The analysis establishes baseline genetic parameters and identifies areas for improvement.

Report Sections:

  1. Breed Management Context - Housing systems and environmental factors
  2. Phenotypic Summary - Population structure and body weight analysis
  3. Mating Ratio Analysis - Breeding structure assessment (1:4 for ILRI lines, 1:10 for Tilili)
  4. Enhanced Inbreeding Analysis - Comprehensive F-coefficient evaluation
  5. Genetic Analysis - Variance components, heritability, and EBVs
  6. NHIL-20 Detailed Analysis - Population substructure investigation
  7. Tilili In-Depth Analysis - Comprehensive egg production traits

2 Breed Management Context

🌿 Housing System & Environmental Factors

Understanding the production environment is critical when comparing Estimated Breeding Values (EBVs) across lines. The housing systems differ fundamentally between the Tilili line and all other ILRI lines.

2.0.1 Tilili Line Semi-Scavenging System

The Tilili ecotype is raised under a two-phase semi-scavenging system designed to simulate smallholder farming conditions:

  • Phase 1 (Weeks 0-9): Controlled/conventional housing for early growth and health monitoring
  • Phase 2 (Weeks 9-16): Free-range conditions in outdoor paddocks to assess adaptability
  • Restricted feeding (80%): Only 80% of standard feed ration to encourage natural foraging behavior
  • Mating ratio: 1:10 (1 male to 10 females) for natural mating
  • Semi-scavenging system: Supplemental feeding plus natural food sources (insects, seeds, vegetation)

2.0.2 ILRI Lines Conventional Housing

All other breeds (LIL-20, BIL-20, NHIL-20, WIL-20) are raised in conventional housing systems:

  • Indoor housing: Climate-controlled environment
  • Controlled feeding: Standardized feed rations with measured intake
  • Mating ratio: 1:4 (1 male to 4 females) as standard breeding practice
  • Biosecurity: Lower disease exposure through controlled entry

📋 Line-by-Line Housing and Mating Summary

Line Housing System Mating Ratio Environment Type
LIL-20 Conventional Housing 1:4 Conventional
BIL-20 Conventional Housing 1:4 Conventional
NHIL-20 Conventional Housing 1:4 Conventional
WIL-20 Conventional Housing 1:4 Conventional
Tilili Semi-Scavenging (Wk 0-9 Controlled, Wk 9-16 Free-Range) 1:10 Semi-Scavenging

3 Mating Ratio Analysis

🐓 Mating Ratios by Breeding Line

The breeding program uses different mating ratios based on line characteristics and management systems:

Line Type Mating Ratio Rationale
ILRI Lines (LIL-20, BIL-20, NHIL-20, WIL-20) 1:4 Controlled pens, higher selection intensity
Tilili (Local Ethiopian) 1:10 Natural mating, simulates smallholder conditions

Key considerations:

  • Genetic diversity: Using multiple sires prevents excessive inbreeding
  • Selection intensity: Lower ratio (1:4) allows stronger male selection; higher ratio (1:10) reduces male replacement needs
  • Effective population size (Ne): Higher Ne reduces genetic drift
  • Practical management: 1:10 ratio typical for semi-scavenging systems in smallholder farms

Formula for Effective Population Size:

Ne = (4 × Nm × Nf) / (Nm + Nf)

Where: Nm = number of males, Nf = number of females

  • With a 1:4 ratio: If Nm = 10, Nf = 40, then Ne = (4 × 10 × 40) / (10 + 40) = 32
  • With a 1:10 ratio: If Nm = 5, Nf = 50, then Ne = (4 × 5 × 50) / (5 + 50) = 18.2

3.1 Observed Mating Ratios by Line and Generation

mating_all <- bind_rows(
  compute_mating_ratio(lil_data, "LIL-20"),
  compute_mating_ratio(bil_data, "BIL-20"),
  compute_mating_ratio(nhil_data, "NHIL-20"),
  compute_mating_ratio(wil_data, "WIL-20"),
  compute_mating_ratio(tilili_data, "Tilili")
)

if (nrow(mating_all) > 0) {
  mating_all %>%
    kable(col.names = c("Line", "Generation", "N Sires", "N Dams", "N Offspring", "Ratio", "Mating Ratio"),
          align = c("l", "l", rep("r", 5))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#3182ce", color = "white")
}
Line Generation N Sires N Dams N Offspring Ratio Mating Ratio
LIL-20 G1 11 22 157 2.0 1:2
LIL-20 G2 12 43 446 3.6 1:4
BIL-20 G1 6 27 155 4.5 1:4
BIL-20 G2 8 39 342 4.9 1:5
NHIL-20 G1 5 24 151 4.8 1:5
NHIL-20 G2 6 27 244 4.5 1:4
WIL-20 G1 7 24 92 3.4 1:3
WIL-20 G2 9 23 306 2.6 1:3
Tilili G3 48 212 722 4.4 1:4
Tilili G4 44 289 4112 6.6 1:7
Tilili G5 16 87 1072 5.4 1:5
if (nrow(mating_all) > 0) {
  # Add target lines: 4 for ILRI lines, 10 for Tilili
  ggplot(mating_all, aes(x = Generation, y = Mating_Ratio, fill = Line)) +
    geom_bar(stat = "identity", position = "dodge", width = 0.7) +
    geom_hline(yintercept = 4, linetype = "dashed", color = "#3182ce", linewidth = 1) +
    geom_hline(yintercept = 10, linetype = "dashed", color = "#e53e3e", linewidth = 1) +
    annotate("text", x = 0.6, y = 4.5, label = "Target ILRI lines: 1:4", color = "#3182ce", fontface = "bold", hjust = 0, size = 3.5) +
    annotate("text", x = 0.6, y = 10.5, label = "Target Tilili: 1:10", color = "#e53e3e", fontface = "bold", hjust = 0, size = 3.5) +
    geom_text(aes(label = Mating_Ratio_Label), position = position_dodge(width = 0.7), 
              vjust = -0.5, size = 3.5, fontface = "bold") +
    scale_fill_manual(values = line_colors) +
    labs(title = "Observed Mating Ratio by Line and Generation",
         subtitle = "Target: 1:4 for ILRI lines (blue) | 1:10 for Tilili (red)",
         x = "Generation", y = "Females per Male", fill = "Line") +
    theme_report() +
    scale_y_continuous(expand = expansion(mult = c(0, 0.15)))
}

⚠️ Mating Ratio Deviations

If observed mating ratios deviate significantly from targets:

For ILRI Lines (Target 1:4):

  • Higher ratio (e.g., 1:6, 1:8): Each male serves more females, increasing inbreeding risk
  • Lower ratio (e.g., 1:2): Fewer offspring per male, reduced selection intensity

For Tilili (Target 1:10):

  • Higher ratio (e.g., 1:15): May exceed male’s reproductive capacity, reduce fertility
  • Lower ratio (e.g., 1:5): Higher male cost, deviates from smallholder practice

Recommendations:

  • ILRI Lines: Maintain 1:4 ratio for controlled selection and genetic diversity
  • Tilili: Maintain 1:10 ratio to simulate smallholder conditions and natural mating

4 Phenotypic Summary

4.1 Population Structure

gen_cols <- c("G0", "G1", "G2", "G3", "G4", "G5")
gen_with_data <- gen_cols[colSums(pheno_stats[, gen_cols]) > 0]

display_data <- pheno_stats %>% select(Line, Total, all_of(gen_with_data), Survival_Pct)

totals_row <- data.frame(Line = "TOTAL", Total = sum(pheno_stats$Total), stringsAsFactors = FALSE)
for (g in gen_with_data) { totals_row[[g]] <- sum(pheno_stats[[g]]) }
totals_row$Survival_Pct <- round(mean(pheno_stats$Survival_Pct), 1)

display_data <- bind_rows(display_data, totals_row)

display_data %>%
  kable(col.names = c("Line", "Total", gen_with_data, "Survival %"),
        align = c("l", rep("r", length(gen_with_data) + 2))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#2d3748", color = "white") %>%
  row_spec(6, bold = TRUE, background = "#f7fafc")
Line Total G0 G1 G2 G3 G4 G5 Survival %
LIL-20 1215 612 157 446 0 0 0 74.7
BIL-20 797 300 155 342 0 0 0 92.8
NHIL-20 1116 721 151 244 0 0 0 62.5
WIL-20 662 264 92 306 0 0 0 94.9
Tilili 5906 0 0 0 722 4112 1072 100.0
TOTAL 9696 1897 555 1338 722 4112 1072 85.0

4.2 Body Weight at 16 Weeks

pheno_stats %>%
  select(Line, BW16_N, BW16_Mean, BW16_SD, BW16_Min, BW16_Max, BW16_CV) %>%
  kable(col.names = c("Line", "N", "Mean (g)", "SD (g)", "Min (g)", "Max (g)", "CV %"),
        align = c("l", rep("r", 6))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#3182ce", color = "white")
Line N Mean (g) SD (g) Min (g) Max (g) CV %
LIL-20 793 981 224 231 1535 22.8
BIL-20 686 1354 281 392 2254 20.8
NHIL-20 641 1196 409 336 2204 34.2
WIL-20 601 1148 272 359 1875 23.7
Tilili 3783 1073 303 110 2210 28.2
all_pheno <- bind_rows(
  lil_data %>% select(Line, BW_16wk), bil_data %>% select(Line, BW_16wk),
  nhil_data %>% select(Line, BW_16wk), wil_data %>% select(Line, BW_16wk),
  tilili_data %>% select(Line, BW_16wk)
)

ggplot(all_pheno %>% filter(!is.na(BW_16wk)), aes(x = Line, y = BW_16wk, fill = Line)) +
  geom_boxplot(alpha = 0.8, outlier.shape = 21) +
  scale_fill_manual(values = line_colors) +
  labs(title = "Body Weight at 16 Weeks - Comparison Across Lines", x = "Line", y = "Body Weight (g)") +
  theme_report() + theme(legend.position = "none")

4.3 Phenotypic Trend

pheno_trend <- bind_rows(
  lil_data %>% select(Line, Generation, BW_16wk),
  bil_data %>% select(Line, Generation, BW_16wk),
  nhil_data %>% select(Line, Generation, BW_16wk),
  wil_data %>% select(Line, Generation, BW_16wk),
  tilili_data %>% select(Line, Generation, BW_16wk)
) %>%
  filter(!is.na(BW_16wk)) %>%
  group_by(Line, Generation) %>%
  summarise(
    N = n(),
    Mean_BW16 = mean(BW_16wk, na.rm = TRUE),
    SE_BW16 = sd(BW_16wk, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  ) %>%
  mutate(Generation = paste0("G", Generation))
y_min <- floor(min(pheno_trend$Mean_BW16 - pheno_trend$SE_BW16, na.rm = TRUE) / 100) * 100 - 100
y_max <- ceiling(max(pheno_trend$Mean_BW16 + pheno_trend$SE_BW16, na.rm = TRUE) / 100) * 100 + 150

ggplot(pheno_trend, aes(x = Generation, y = Mean_BW16, color = Line, group = Line)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = Mean_BW16 - SE_BW16, ymax = Mean_BW16 + SE_BW16), 
                width = 0.1, linewidth = 0.8) +
  geom_text(aes(label = round(Mean_BW16, 0)), vjust = -1.5, size = 3.5, show.legend = FALSE) +
  scale_color_manual(values = line_colors) +
  labs(title = "Phenotypic Trend: Mean Body Weight at 16 Weeks by Generation",
       subtitle = "Error bars represent ± 1 standard error | Y-axis does not start at zero",
       x = "Generation", y = "Mean Body Weight (g)", color = "Line") +
  theme_report() +
  coord_cartesian(ylim = c(y_min, y_max))

4.4 Sexual Dimorphism

pheno_stats %>%
  select(Line, Male_BW16, Female_BW16, Dimorphism_Pct) %>%
  kable(col.names = c("Line", "Male BW_16wk (g)", "Female BW_16wk (g)", "Dimorphism (%)"),
        align = c("l", rep("r", 3))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2d3748", color = "white")
Line Male BW_16wk (g) Female BW_16wk (g) Dimorphism (%)
LIL-20 1125 866 29.9
BIL-20 1494 1200 24.5
NHIL-20 1427 995 43.4
WIL-20 1282 996 28.7
Tilili 1210 956 26.6

5 Enhanced Inbreeding Analysis

🧬 Inbreeding Coefficient (F) Analysis

The inbreeding coefficient (F) measures the probability that two alleles at a locus are identical by descent. This analysis uses enhanced thresholds to identify various levels of inbreeding:

Inbreeding Thresholds:

Threshold Equivalent Relationship Risk Level
F > 0% Any common ancestor Monitoring
F ≥ 6.25% First cousin mating Low
F ≥ 12.5% Half-sib mating Moderate
F ≥ 25% Full-sib mating High

Rate of Inbreeding (ΔF): The increase in F per generation should be kept below 1% to maintain long-term genetic diversity.

5.1 Detailed Inbreeding Analysis by Line and Generation

inbreeding_detailed <- bind_rows(
  compute_inbreeding_detailed(ebv_all, lil_data, "LIL-20"),
  compute_inbreeding_detailed(ebv_all, bil_data, "BIL-20"),
  compute_inbreeding_detailed(ebv_all, nhil_data, "NHIL-20"),
  compute_inbreeding_detailed(ebv_all, wil_data, "WIL-20"),
  compute_inbreeding_detailed(ebv_all, tilili_data, "Tilili")
)

if (!is.null(inbreeding_detailed) && nrow(inbreeding_detailed) > 0) {
  inbreeding_detailed %>%
    select(Line, Generation, N_Total, N_F_gt_0, Pct_F_gt_0, N_F_ge_6.25, Pct_F_ge_6.25,
           N_F_ge_12.5, Pct_F_ge_12.5, N_F_ge_25, Pct_F_ge_25, Mean_F, Median_F, Max_F) %>%
    kable(col.names = c("Line", "Gen", "N", "N(F>0%)", "%(F>0%)", 
                        "N(F≥6.25%)", "%(F≥6.25%)", "N(F≥12.5%)", "%(F≥12.5%)", 
                        "N(F≥25%)", "%(F≥25%)", "Mean F%", "Median F%", "Max F%"),
          align = c("l", "l", rep("r", 12))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE, font_size = 14) %>%
    row_spec(0, bold = TRUE, background = "#805ad5", color = "white")
}
Line Gen N N(F>0%) %(F>0%) N(F≥6.25%) %(F≥6.25%) N(F≥12.5%) %(F≥12.5%) N(F≥25%) %(F≥25%) Mean F% Median F% Max F%
LIL-20 G0 291 0 0.0 0 0.0 0 0.0 0 0.0 0.00 0.00 0.00
LIL-20 G1 134 0 0.0 0 0.0 0 0.0 0 0.0 0.00 0.00 0.00
LIL-20 G2 362 26 7.2 26 7.2 26 7.2 15 4.1 1.42 0.00 25.00
BIL-20 G0 269 0 0.0 0 0.0 0 0.0 0 0.0 0.00 0.00 0.00
BIL-20 G1 124 0 0.0 0 0.0 0 0.0 0 0.0 0.00 0.00 0.00
BIL-20 G2 292 75 25.7 75 25.7 75 25.7 0 0.0 3.21 0.00 12.50
NHIL-20 G0 333 0 0.0 0 0.0 0 0.0 0 0.0 0.00 0.00 0.00
NHIL-20 G1 93 0 0.0 0 0.0 0 0.0 0 0.0 0.00 0.00 0.00
NHIL-20 G2 215 59 27.4 59 27.4 59 27.4 9 4.2 3.95 0.00 25.00
WIL-20 G0 237 0 0.0 0 0.0 0 0.0 0 0.0 0.00 0.00 0.00
WIL-20 G1 87 0 0.0 0 0.0 0 0.0 0 0.0 0.00 0.00 0.00
WIL-20 G2 275 46 16.7 46 16.7 46 16.7 10 3.6 2.55 0.00 25.00
Tilili G3 503 357 71.0 47 9.3 20 4.0 1 0.2 2.08 0.83 25.00
Tilili G4 2546 2322 91.2 402 15.8 172 6.8 23 0.9 3.05 1.55 25.76
Tilili G5 787 785 99.7 140 17.8 8 1.0 0 0.0 3.67 2.87 14.47
if (!is.null(inbreeding_detailed) && nrow(inbreeding_detailed) > 0) {
  ggplot(inbreeding_detailed, aes(x = Generation, y = Mean_F, color = Line, group = Line)) +
    geom_line(linewidth = 1.2) +
    geom_point(size = 4) +
    geom_hline(yintercept = 12.5, linetype = "dashed", color = "#ed8936", linewidth = 0.8) +
    annotate("text", x = 1, y = 13.5, label = "Half-sib level (12.5%)", color = "#ed8936", 
             fontface = "italic", hjust = 0, size = 3.5) +
    scale_color_manual(values = line_colors) +
    labs(title = "Mean Inbreeding Coefficient by Line and Generation",
         subtitle = "Dashed line indicates half-sib mating level (F = 12.5%)",
         x = "Generation", y = "Mean Inbreeding Coefficient (F%)", color = "Line") +
    theme_report() +
    scale_y_continuous(limits = c(0, max(inbreeding_detailed$Mean_F, na.rm = TRUE) * 1.2))
}

5.2 Inbreeding Summary by Line

if (!is.null(inbreeding_detailed) && nrow(inbreeding_detailed) > 0) {
  inbreeding_summary <- inbreeding_detailed %>%
    group_by(Line) %>%
    summarise(
      N_Total_sum = sum(N_Total, na.rm = TRUE),
      # Calculate weighted mean manually: sum(x*w) / sum(w)
      Mean_F = round(sum(Mean_F * N_Total, na.rm = TRUE) / sum(N_Total, na.rm = TRUE), 2),
      Max_F = round(max(Max_F, na.rm = TRUE), 2),
      N_F_gt_0 = sum(N_F_gt_0, na.rm = TRUE),
      N_F_ge_12.5 = sum(N_F_ge_12.5, na.rm = TRUE),
      N_F_ge_25 = sum(N_F_ge_25, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    rename(N_Total = N_Total_sum) %>%
    mutate(
      Pct_F_gt_0 = round(N_F_gt_0 / N_Total * 100, 1),
      Pct_F_ge_12.5 = round(N_F_ge_12.5 / N_Total * 100, 1),
      Pct_F_ge_25 = round(N_F_ge_25 / N_Total * 100, 1)
    )
  
  inbreeding_summary %>%
    select(Line, N_Total, Mean_F, Max_F, Pct_F_gt_0, Pct_F_ge_12.5, Pct_F_ge_25) %>%
    kable(col.names = c("Line", "N Total", "Mean F%", "Max F%", "% with F>0%", "% with F≥12.5%", "% with F≥25%"),
          align = c("l", rep("r", 6))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
    row_spec(0, bold = TRUE, background = "#805ad5", color = "white")
}
Line N Total Mean F% Max F% % with F>0% % with F≥12.5% % with F≥25%
BIL-20 685 1.37 12.50 10.9 10.9 0.0
LIL-20 787 0.65 25.00 3.3 3.3 1.9
NHIL-20 641 1.32 25.00 9.2 9.2 1.4
Tilili 3836 3.05 25.76 90.3 5.2 0.6
WIL-20 599 1.17 25.00 7.7 7.7 1.7

5.3 Population Inbreeding Distribution by Line

# Create inbreeding category distribution for all lines
if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  
  # Categorize inbreeding levels
  ebv_with_categories <- ebv_all %>%
    filter(!is.na(Inbreeding_pct)) %>%
    mutate(
      F_Category = case_when(
        Inbreeding_pct == 0 ~ "Non-inbred (F = 0%)",
        Inbreeding_pct < 3.125 ~ "Below 2nd cousin (F < 3.125%)",
        Inbreeding_pct < 6.25 ~ "2nd cousin level (F < 6.25%)",
        Inbreeding_pct < 12.5 ~ "1st cousin level (F < 12.5%)",
        Inbreeding_pct < 25 ~ "Half-sib level (F < 25%)",
        TRUE ~ "Full-sib level (F ≥ 25%)"
      ),
      F_Category = factor(F_Category, levels = c(
        "Non-inbred (F = 0%)",
        "Below 2nd cousin (F < 3.125%)",
        "2nd cousin level (F < 6.25%)",
        "1st cousin level (F < 12.5%)",
        "Half-sib level (F < 25%)",
        "Full-sib level (F ≥ 25%)"
      ))
    )
  
  # Summary by line and category
  inbreeding_dist <- ebv_with_categories %>%
    group_by(Line, F_Category) %>%
    summarise(N_Animals = n(), .groups = "drop") %>%
    group_by(Line) %>%
    mutate(
      Percent = round(N_Animals / sum(N_Animals) * 100, 1),
      Cumulative = round(cumsum(N_Animals) / sum(N_Animals) * 100, 1)
    ) %>%
    ungroup()
  
  # Display for each line
  for (line_name in unique(inbreeding_dist$Line)) {
    cat("\n\n### ", line_name, " - Inbreeding Distribution\n\n", sep = "")
    
    line_dist <- inbreeding_dist %>% filter(Line == line_name)
    
    tbl <- line_dist %>%
      select(F_Category, N_Animals, Percent, Cumulative) %>%
      kable(col.names = c("Inbreeding Category", "N Animals", "Percent (%)", "Cumulative (%)"),
            align = c("l", rep("r", 3)), format = "html", escape = FALSE) %>%
      kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
      row_spec(0, bold = TRUE, background = line_colors[line_name], color = "white")
    
    cat(as.character(tbl))
    cat("\n\n")
  }
}

5.3.1 BIL-20 - Inbreeding Distribution

Inbreeding Category N Animals Percent (%) Cumulative (%)
Non-inbred (F = 0%) 610 89.1 89.1
Half-sib level (F < 25%) 75 10.9 100.0

5.3.2 LIL-20 - Inbreeding Distribution

Inbreeding Category N Animals Percent (%) Cumulative (%)
Non-inbred (F = 0%) 772 96.7 96.7
Half-sib level (F < 25%) 11 1.4 98.1
Full-sib level (F ≥ 25%) 15 1.9 100.0

5.3.3 NHIL-20 - Inbreeding Distribution

Inbreeding Category N Animals Percent (%) Cumulative (%)
Non-inbred (F = 0%) 582 90.8 90.8
Half-sib level (F < 25%) 50 7.8 98.6
Full-sib level (F ≥ 25%) 9 1.4 100.0

5.3.4 Tilili - Inbreeding Distribution

Inbreeding Category N Animals Percent (%) Cumulative (%)
Non-inbred (F = 0%) 408 10.5 10.5
Below 2nd cousin (F < 3.125%) 2233 57.5 68.0
2nd cousin level (F < 6.25%) 647 16.7 84.7
1st cousin level (F < 12.5%) 394 10.1 94.8
Half-sib level (F < 25%) 176 4.5 99.4
Full-sib level (F ≥ 25%) 24 0.6 100.0

5.3.5 WIL-20 - Inbreeding Distribution

Inbreeding Category N Animals Percent (%) Cumulative (%)
Non-inbred (F = 0%) 554 92.3 92.3
Half-sib level (F < 25%) 36 6.0 98.3
Full-sib level (F ≥ 25%) 10 1.7 100.0

5.4 Inbreeding Coefficient Distribution by Line

if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  ebv_inbreeding <- ebv_all %>% filter(!is.na(Inbreeding_pct))
  
  ggplot(ebv_inbreeding, aes(x = Inbreeding_pct, fill = Line)) +
    geom_histogram(bins = 30, color = "white", alpha = 0.8) +
    geom_vline(xintercept = 0, linetype = "solid", color = "black", linewidth = 0.5) +
    geom_vline(xintercept = 6.25, linetype = "dashed", color = "#ed8936", linewidth = 0.8) +
    geom_vline(xintercept = 12.5, linetype = "dashed", color = "#e53e3e", linewidth = 0.8) +
    geom_vline(xintercept = 25, linetype = "dashed", color = "#9b2c2c", linewidth = 0.8) +
    facet_wrap(~Line, scales = "free_y", ncol = 2) +
    scale_fill_manual(values = line_colors) +
    labs(
      title = "Inbreeding Coefficient Distribution by Line",
      subtitle = "Dashed lines: Orange = 1st cousin (6.25%) | Red = Half-sib (12.5%) | Dark red = Full-sib (25%)",
      x = "Inbreeding Coefficient (F%)",
      y = "Number of Animals"
    ) +
    theme_report() +
    theme(legend.position = "none")
}

📋 Inbreeding Management Guidelines

Based on the inbreeding analysis across all lines:

Priority Recommendation
1. Selection Prioritize non-inbred or distantly related animals (F < 3.125%) for breeding decisions
2. Caution Limit use of half-sib level animals (F ≥ 12.5%) as breeding candidates
3. Avoid Avoid using full-sib level animals (F ≥ 25%) as parents to prevent inbreeding depression
4. Monitor Monitor effective population size (Ne) to prevent future inbreeding accumulation
5. Optimize Implement Optimal Contribution Selection (OCS) using MateSelect to balance genetic gain and diversity

Key thresholds to monitor:

  • F < 3.125%: Distantly related - preferred for breeding
  • F = 6.25%: First cousin level - acceptable but monitor
  • F = 12.5%: Half-sib level - limit usage
  • F = 25%: Full-sib level - avoid for breeding
  • ΔF < 1%: Target rate of inbreeding increase per generation

⚠️ Inbreeding Management Recommendations

Based on the analysis above:

  1. Monitor lines with Mean F > 5%: These require careful mate selection to avoid further increases
  2. Avoid mating relatives: Use pedigree information to prevent half-sib or closer matings
  3. Maintain target mating ratios: 1:4 for ILRI lines and 1:10 for Tilili to distribute genetic contributions appropriately
  4. Target ΔF < 1% per generation: Keep the rate of inbreeding accumulation low
  5. Consider rotational mating: Systematically rotate sire usage across generations

6 Genetic Analysis

6.1 Variance Components and Heritability

ℹ️ Analysis Method

Genetic parameters were estimated using WOMBAT software with an animal model. The model included fixed effects: Sex, Batch, Year, Month. Trait: body weight at 16 weeks.

if (!is.null(vc_summary) && nrow(vc_summary) > 0) {
  vc_summary %>%
    kable(col.names = c("Line", "N", "Va", "SE(Va)", "Ve", "SE(Ve)", "Vp", "h²", "SE(h²)"),
          align = c("l", rep("r", 8))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#805ad5", color = "white") %>%
    row_spec(which(vc_summary$h2 > 0.6), bold = TRUE, background = "#fed7d7")
}
Line N Va SE(Va) Ve SE(Ve) Vp SE(h²)
LIL LIL-20 787 19579.0 4163.9 14212.6 2787.6 33791.6 0.579 0.097
BIL BIL-20 685 12995.1 5250.1 30647.0 4169.6 43642.2 0.298 0.110
NHIL NHIL-20 638 89703.6 0.0 28389.8 4796.9 118093.0 0.760 0.031
WIL WIL-20 599 15082.8 5071.0 24419.8 3824.7 39502.6 0.382 0.112
Tilili Tilili 3710 12914.1 2674.6 52893.5 2018.5 65807.5 0.196 0.037
if (!is.null(vc_summary) && nrow(vc_summary) > 0) {
  ggplot(vc_summary, aes(x = Line, y = h2, fill = Line)) +
    geom_bar(stat = "identity", width = 0.6) +
    geom_errorbar(aes(ymin = h2 - h2_SE, ymax = h2 + h2_SE), width = 0.2) +
    geom_text(aes(label = h2), vjust = -0.8, size = 5, fontface = "bold") +
    geom_hline(yintercept = 0.45, linetype = "dashed", color = "orange", linewidth = 1) +
    annotate("text", x = 0.6, y = 0.47, label = "Typical upper range (h² = 0.45)", hjust = 0, color = "orange", size = 3.5) +
    scale_fill_manual(values = line_colors) +
    labs(title = "Heritability Estimates for Body Weight at 16 Weeks",
         subtitle = "Error bars show ± 1 SE | Dashed line indicates typical upper range",
         x = "Line", y = "Heritability (h²)") +
    theme_report() + theme(legend.position = "none") +
    scale_y_continuous(limits = c(0, max(vc_summary$h2 + vc_summary$h2_SE, na.rm = TRUE) * 1.2))
}

6.2 Breeding Structure Analysis

6.2.1 Number of Sires and Dams by Generation

parents_all <- bind_rows(
  compute_parents_by_gen(lil_data, "LIL-20"),
  compute_parents_by_gen(bil_data, "BIL-20"),
  compute_parents_by_gen(nhil_data, "NHIL-20"),
  compute_parents_by_gen(wil_data, "WIL-20"),
  compute_parents_by_gen(tilili_data, "Tilili")
)

parents_wide <- parents_all %>%
  pivot_wider(names_from = Generation, values_from = c(N_Sires, N_Dams), names_sep = "_")

parents_wide %>%
  kable(align = c("l", rep("r", ncol(parents_wide) - 1))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#2d3748", color = "white")
Line N_Sires_G1 N_Sires_G2 N_Sires_G3 N_Sires_G4 N_Sires_G5 N_Dams_G1 N_Dams_G2 N_Dams_G3 N_Dams_G4 N_Dams_G5
LIL-20 11 12 NA NA NA 22 43 NA NA NA
BIL-20 6 8 NA NA NA 27 39 NA NA NA
NHIL-20 5 6 NA NA NA 24 27 NA NA NA
WIL-20 7 9 NA NA NA 24 23 NA NA NA
Tilili NA NA 48 44 16 NA NA 212 289 87

6.2.2 Sire EBV Summary by Generation

sire_ebv_by_gen <- bind_rows(
  compute_sire_ebv_by_gen(lil_data, ebv_all, "LIL-20"),
  compute_sire_ebv_by_gen(bil_data, ebv_all, "BIL-20"),
  compute_sire_ebv_by_gen(nhil_data, ebv_all, "NHIL-20"),
  compute_sire_ebv_by_gen(wil_data, ebv_all, "WIL-20"),
  compute_sire_ebv_by_gen(tilili_data, ebv_all, "Tilili")
)

if (!is.null(sire_ebv_by_gen) && nrow(sire_ebv_by_gen) > 0) {
  sire_ebv_by_gen %>%
    kable(col.names = c("Line", "Generation", "N Sires", "Sires with EBV", "N Offspring", 
                        "Mean EBV (g)", "SD (g)", "Min (g)", "Max (g)"),
          align = c("l", "l", rep("r", 7))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#276749", color = "white")
}
Line Generation N Sires Sires with EBV N Offspring Mean EBV (g) SD (g) Min (g) Max (g)
LIL-20 G1 11 11 157 114.3 72.5 -58.3 215.0
LIL-20 G2 12 12 446 171.5 64.8 70.2 276.3
BIL-20 G1 6 6 155 65.9 57.7 -21.2 143.8
BIL-20 G2 8 8 342 53.0 66.7 -61.8 113.5
NHIL-20 G1 5 5 151 348.2 148.9 218.4 578.0
NHIL-20 G2 6 6 244 448.7 122.0 310.2 634.7
WIL-20 G1 7 7 92 94.4 76.0 -9.2 224.6
WIL-20 G2 9 9 306 110.3 87.8 12.2 269.1
Tilili G3 48 26 722 16.8 72.3 -82.1 169.0
Tilili G4 44 29 4112 38.1 75.1 -126.9 220.6
Tilili G5 16 14 1072 92.4 96.9 -82.4 255.7

6.3 EBV Summary Statistics

if (!is.null(ebv_stats) && nrow(ebv_stats) > 0) {
  ebv_stats %>%
    kable(col.names = c("Line", "N", "Mean EBV (g)", "SD (g)", "Min (g)", "Max (g)", "Mean F%", "N Inbred"),
          align = c("l", rep("r", 7))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#2d3748", color = "white")
}
Line N Mean EBV (g) SD (g) Min (g) Max (g) Mean F% N Inbred
BIL-20 685 35.7 67.7 -217.2 207.8 1.37 75
LIL-20 798 71.1 111.9 -307.5 315.8 0.64 26
NHIL-20 641 175.6 298.0 -668.4 842.7 1.33 59
Tilili 3882 43.7 74.8 -182.3 312.6 3.03 3474
WIL-20 600 57.4 95.8 -243.0 293.7 1.17 46
if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  ggplot(ebv_all, aes(x = EBV, fill = Line)) +
    geom_histogram(bins = 30, alpha = 0.8, color = "white") +
    geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
    facet_wrap(~Line, scales = "free_y", ncol = 2) +
    scale_fill_manual(values = line_colors) +
    labs(title = "EBV Distribution for Body Weight at 16 Weeks",
         subtitle = "Red dashed line indicates EBV = 0", x = "EBV (g)", y = "Count") +
    theme_report() + theme(legend.position = "none")
}

6.4 Genetic Trend

all_pheno_with_id <- bind_rows(
  lil_data %>% select(Line, ID, Generation, BW_16wk),
  bil_data %>% select(Line, ID, Generation, BW_16wk),
  nhil_data %>% select(Line, ID, Generation, BW_16wk),
  wil_data %>% select(Line, ID, Generation, BW_16wk),
  tilili_data %>% select(Line, ID, Generation, BW_16wk)
)

if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  ebv_with_gen <- ebv_all %>%
    mutate(ID_original = as.character(ID_original)) %>%
    left_join(all_pheno_with_id %>% select(Line, ID, Generation) %>% mutate(ID = as.character(ID)), 
              by = c("Line" = "Line", "ID_original" = "ID")) %>%
    filter(!is.na(Generation))
  
  genetic_trend <- ebv_with_gen %>%
    group_by(Line, Generation) %>%
    summarise(N = n(), Mean_EBV = mean(EBV, na.rm = TRUE),
              SE_EBV = sd(EBV, na.rm = TRUE) / sqrt(n()), .groups = "drop") %>%
    mutate(Generation = paste0("G", Generation))
}
if (exists("genetic_trend") && nrow(genetic_trend) > 0) {
  ggplot(genetic_trend, aes(x = Generation, y = Mean_EBV, color = Line, group = Line)) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
    geom_line(linewidth = 1.2) + geom_point(size = 4) +
    geom_errorbar(aes(ymin = Mean_EBV - SE_EBV, ymax = Mean_EBV + SE_EBV), width = 0.1, linewidth = 0.8) +
    geom_text(aes(label = round(Mean_EBV, 1)), vjust = -1.5, size = 3.5, show.legend = FALSE) +
    scale_color_manual(values = line_colors) +
    labs(title = "Genetic Trend: Mean EBV by Generation",
         subtitle = "Error bars represent ± 1 SE",
         x = "Generation", y = "Mean EBV (g)", color = "Line") +
    theme_report() + scale_y_continuous(expand = expansion(mult = c(0.15, 0.15)))
}

6.5 Top Breeding Animals

if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
  top_animals <- ebv_all %>%
    group_by(Line) %>%
    arrange(desc(EBV)) %>%
    slice_head(n = 10) %>%
    mutate(Rank = row_number()) %>%
    ungroup() %>%
    select(Line, Rank, ID_original, EBV, Accuracy, Inbreeding_pct) %>%
    mutate(EBV = round(EBV, 1),
           Accuracy = ifelse(is.na(Accuracy), "-", round(Accuracy, 2)),
           Inbreeding_pct = ifelse(is.na(Inbreeding_pct), "-", round(Inbreeding_pct, 2)))
  
  top_animals %>%
    kable(col.names = c("Line", "Rank", "Animal ID", "EBV (g)", "Accuracy", "Inbreeding %"),
          align = c("l", "c", "l", "r", "r", "r")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#276749", color = "white") %>%
    row_spec(which(top_animals$Rank == 1), bold = TRUE, background = "#c6f6d5")
}
Line Rank Animal ID EBV (g) Accuracy Inbreeding %
BIL-20 1 C-7908 207.8 0.71 12.50
BIL-20 2 1222 204.3 0.54 0.00
BIL-20 3 C-7918 192.8 0.65 0.00
BIL-20 4 B-3407 189.9 0.78 0.00
BIL-20 5 C-7943 188.5 0.66 0.00
BIL-20 6 C-3263 184.5 0.66 0.00
BIL-20 7 1426 183.2 0.54 0.00
BIL-20 8 C-7075 178.4 0.71 12.50
BIL-20 9 1287 174.6 0.54 0.00
BIL-20 10 B-3091 169.2 0.68 0.00
LIL-20 1 C-7742 315.8 0.80 0.00
LIL-20 2 C-7562 299.9 0.81 0.00
LIL-20 3 C-7660 297.5 0.81 0.00
LIL-20 4 C-7565 297.0 0.81 0.00
LIL-20 5 C-8084 292.5 0.80 0.00
LIL-20 6 C-8158 285.6 0.81 0.00
LIL-20 7 C-8142 283.4 0.80 0.00
LIL-20 8 C-8147 281.1 0.81 0.00
LIL-20 9 C-7719 277.3 0.80 0.00
LIL-20 10 B-3323 276.3 0.91 0.00
NHIL-20 1 C-7171 842.7 0.90 25.00
NHIL-20 2 C-7136 842.5 0.89 12.50
NHIL-20 3 C-7178 805.5 0.87 0.00
NHIL-20 4 C-7180 784.1 0.87 0.00
NHIL-20 5 C-7981 774.7 0.89 12.50
NHIL-20 6 C-7137 756.8 0.89 12.50
NHIL-20 7 C-7128 727.8 0.87 0.00
NHIL-20 8 C-7181 724.8 0.87 0.00
NHIL-20 9 2384 717.4 0.87 0.00
NHIL-20 10 C-7966 693.7 0.87 0.00
Tilili 1 6233 312.6 0.74 25.76
Tilili 2 6235 295.0 0.74 25.76
Tilili 3 2481 273.2 0.74 25.76
Tilili 4 2485 267.3 0.74 25.76
Tilili 5 1941 265.6 0.74 25.76
Tilili 6 1370 257.4 0.74 25.76
Tilili 7 1371 255.7 0.74 25.76
Tilili 8 257 255.7 0.83 0.98
Tilili 9 1054 254.2 0.74 25.76
Tilili 10 1055 253.2 0.74 25.76
WIL-20 1 C-7498 293.7 0.71 0.00
WIL-20 2 B-3346 288.2 0.78 0.00
WIL-20 3 C-8280 287.2 0.72 0.00
WIL-20 4 C-7497 277.2 0.71 0.00
WIL-20 5 C-7435 276.1 0.72 0.00
WIL-20 6 B-3236 269.1 0.86 0.00
WIL-20 7 C-3153 265.1 0.71 0.00
WIL-20 8 C-7510 263.7 0.72 0.00
WIL-20 9 B-3368 262.6 0.78 0.00
WIL-20 10 C-8283 261.2 0.72 0.00

7 NHIL-20 Detailed Analysis

🔍 NHIL-20 Requires Further Investigation

The NHIL-20 line shows an unusually wide EBV distribution compared to other lines:

  • EBV range: -668.4g to 842.7g (vs. ~500g range in other lines)
  • Heritability: 0.76 (higher than typical 0.25-0.45 for body weight)
  • High genetic variance: Va = 89,704 (2-7x higher than other lines)

This section provides a detailed breakdown to investigate potential population substructure that may be inflating heritability estimates.

7.1 NHIL-20 Breeding Structure

nhil_parents <- compute_parents_by_gen(nhil_data, "NHIL-20")

nhil_parents %>%
  kable(col.names = c("Line", "Generation", "N Sires", "N Dams"),
        align = c("l", "l", "r", "r")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#805ad5", color = "white")
Line Generation N Sires N Dams
NHIL-20 G1 5 24
NHIL-20 G2 6 27

7.2 NHIL-20 EBV Distribution by Generation

nhil_ebv_with_gen <- ebv_all %>%
  filter(Line == "NHIL-20") %>%
  mutate(ID_original = as.character(ID_original)) %>%
  left_join(nhil_data %>% select(ID, Generation) %>% mutate(ID = as.character(ID)),
            by = c("ID_original" = "ID")) %>%
  filter(!is.na(Generation)) %>%
  mutate(Generation = paste0("G", Generation))

nhil_ebv_by_gen <- nhil_ebv_with_gen %>%
  group_by(Generation) %>%
  summarise(N = n(), Mean_EBV = round(mean(EBV, na.rm = TRUE), 1),
            SD_EBV = round(sd(EBV, na.rm = TRUE), 1),
            Min_EBV = round(min(EBV, na.rm = TRUE), 1),
            Max_EBV = round(max(EBV, na.rm = TRUE), 1), .groups = "drop")

nhil_ebv_by_gen %>%
  kable(col.names = c("Generation", "N", "Mean EBV (g)", "SD (g)", "Min (g)", "Max (g)"),
        align = c("l", rep("r", 5))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#805ad5", color = "white")
Generation N Mean EBV (g) SD (g) Min (g) Max (g)
G0 333 -3.0 266.8 -668.4 717.4
G1 93 278.5 175.6 -280.3 656.2
G2 215 407.7 183.2 -307.5 842.7
ggplot(nhil_ebv_with_gen, aes(x = EBV, fill = Generation)) +
  geom_histogram(bins = 25, alpha = 0.8, color = "white") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
  facet_wrap(~Generation, ncol = 3, scales = "free_y") +
  scale_fill_manual(values = c("G0" = "#3182ce", "G1" = "#38a169", "G2" = "#805ad5")) +
  labs(title = "NHIL-20: EBV Distribution by Generation",
       subtitle = "Red dashed line indicates EBV = 0", x = "EBV (g)", y = "Count") +
  theme_report() + theme(legend.position = "none")

7.3 NHIL-20 Population Substructure Detection (Gaussian Mixture Model)

nhil_ebv_data <- ebv_all %>%
  filter(Line == "NHIL-20") %>%
  mutate(ID_original = as.character(ID_original)) %>%
  left_join(nhil_data %>% 
              select(ID, Generation, Sire_ID, Dam_ID, Sex, BW_16wk) %>% 
              mutate(ID = as.character(ID)),
            by = c("ID_original" = "ID")) %>%
  filter(!is.na(EBV))

ebv_vector <- nhil_ebv_data$EBV
gmm_fit <- Mclust(ebv_vector, G = 1:6)
n_components <- gmm_fit$G
bic_values <- gmm_fit$BIC

cat("================================================================================\n")
## ================================================================================
cat("       GAUSSIAN MIXTURE MODEL RESULTS - NHIL-20\n")
##        GAUSSIAN MIXTURE MODEL RESULTS - NHIL-20
cat("================================================================================\n\n")
## ================================================================================
cat("Optimal number of components (subpopulations):", n_components, "\n\n")
## Optimal number of components (subpopulations): 2
nhil_ebv_data$Cluster <- as.factor(gmm_fit$classification)

if (gmm_fit$modelName %in% c("E", "V")) {
  cluster_sd <- sqrt(gmm_fit$parameters$variance$sigmasq)
  if (length(cluster_sd) == 1) { cluster_sd <- rep(cluster_sd, n_components) }
} else { cluster_sd <- rep(NA, n_components) }

cluster_params <- data.frame(
  Cluster = 1:n_components,
  Mean_EBV = round(gmm_fit$parameters$mean, 1),
  SD_EBV = round(cluster_sd, 1),
  Proportion = round(gmm_fit$parameters$pro * 100, 1)
)

cluster_params %>%
  kable(col.names = c("Cluster", "Mean EBV (g)", "SD (g)", "% of Population"),
        align = c("l", rep("r", 3)),
        caption = "GMM Cluster Parameters") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#805ad5", color = "white")
GMM Cluster Parameters
Cluster Mean EBV (g) SD (g) % of Population
1 -151.1 185.2 33.8
2 342.1 185.2 66.2
ggplot(nhil_ebv_data, aes(x = EBV, fill = Cluster)) +
  geom_histogram(bins = 35, alpha = 0.8, color = "white", position = "identity") +
  geom_vline(xintercept = gmm_fit$parameters$mean, linetype = "dashed", 
             color = c("#3182ce", "#e53e3e", "#38a169", "#ed8936", "#805ad5", "#319795")[1:n_components],
             linewidth = 1) +
  scale_fill_manual(values = c("1" = "#3182ce", "2" = "#e53e3e", "3" = "#38a169", 
                                "4" = "#ed8936", "5" = "#805ad5", "6" = "#319795")) +
  labs(title = paste0("NHIL-20: Animals Classified into ", n_components, " Subpopulations"),
       subtitle = "Dashed lines indicate cluster means",
       x = "EBV (g)", y = "Count", fill = "Cluster") +
  theme_report()

🔍 GMM Analysis Conclusion for NHIL-20

Number of subpopulations detected: 2

if (n_components == 1) {
  cat("**Interpretation:** NHIL-20 appears to be a single homogeneous population.\n")
} else if (n_components == 2) {
  cat("**Interpretation:** NHIL-20 consists of **TWO distinct subpopulations**. This explains the bimodal EBV distribution and inflated heritability.\n\n")
  cat("**Recommendations:** Investigate the origin of animals in each cluster and consider analyzing each subpopulation separately.\n")
} else {
  cat(paste0("**Interpretation:** NHIL-20 consists of **", n_components, " distinct subpopulations**.\n"))
}

Interpretation: NHIL-20 consists of TWO distinct subpopulations. This explains the bimodal EBV distribution and inflated heritability.

Recommendations: Investigate the origin of animals in each cluster and consider analyzing each subpopulation separately.


8 Tilili In-Depth Analysis

🐔 About the Tilili Breeding Program

Tilili is a local Ethiopian chicken ecotype undergoing genetic improvement at ILRI to enhance productivity, climate resilience, and disease resistance while maintaining adaptation to smallholder scavenging conditions.

Program Phase Details
Phase 1: Indoor Brooding Weeks 0-9: Group pens, controlled conditions, wing tags for ID, weekly weights & health assessments, standardized nutrition (Sasso T451A guidelines)
Phase 2: Semi-Scavenging Weeks 9-19: Outdoor paddocks, 80% restricted feeding to encourage foraging, evaluation of heat tolerance & foraging efficiency
Phase 3: Egg Production Weeks 19-45: Selected birds transferred indoors, trap nest system for individual laying performance, monitoring fertility, hatchability & egg number
Breeding System Natural mating (1:10 male:female ratio), controlled half-sib mating, culling of low performers (fertility & egg number), pedigree tracking
Selection Criteria Body weight at 16 weeks, growth under semi-scavenging, disease resistance, climate adaptability, egg production traits
Target Environment Smallholder farms with semi-scavenging systems

Key Evaluation Point: Body weight at 16 weeks represents market age for smallholder systems and concludes the semi-scavenging evaluation period.

8.1 Housing and Feed Management Protocol

📍 Three-Phase Breeding Protocol

Phase 1: Indoor Brooding (Weeks 0-9)

  • Chicks reared in group pens under controlled conditions at ILRI’s Addis Ababa facility
  • Individual wing tagging for lifetime identification
  • Standardized nutrition following Sasso T451A female guidelines
  • Weekly weight measurements and health assessments
  • Week 8: Blood collection (genotyping, antibody profiling), cloacal swabs (pathogen screening), photographic documentation

Phase 2: Semi-Scavenging Evaluation (Weeks 9-19)

  • Birds transition to outdoor paddocks after ID verification and baseline weight recording
  • Feeding regime: 80% of standard guidelines to encourage natural foraging behavior
  • Weekly monitoring: Body weight, health status, environmental adaptation (heat tolerance, foraging efficiency)
  • Dual treatment: Indoor-only birds remain in controlled housing for comparison

Phase 3: Egg Production & Breeding (Weeks 19-45)

  • Selected birds (based on growth, disease resistance, climate adaptability) transferred to indoor housing at 19 weeks
  • 7-day acclimatization period to minimize stress
  • Trap nest system: Individual laying performance monitoring (automatic closing nest boxes)
  • Daily tracking: Egg weight, laying frequency, fertility rates, hatchability success
  • Breeding system: Natural mating at 1:10 male:female ratio, controlled half-sib mating for genetic diversity
  • Culling strategy: Less productive birds (low fertility, low egg number) replaced with new candidates
  • Detailed pedigree records maintained to prevent excessive inbreeding

8.1.1 Housing Management Summary

housing_protocol <- data.frame(
  Batch_Range = c("0-2", "3-13", "3-13"),
  Generation = c("G3", "G4", "G4-G5"),
  Treatment = c("Outdoor Only", "Indoor", "Outdoor"),
  Brooding_Phase = c("Indoor (Wk 0-9)", "Indoor (Wk 0-19)", "Indoor (Wk 0-9 or 0-11*)"),
  Outdoor_Phase = c("Outdoor (Wk 9-19)", "—", "Outdoor (Wk 9-19 or 11-19*)"),
  Release_Age = c("Week 9 (Day 63)", "—", "Week 9 or 11* (Day 63 or 77)"),
  Notes = c(
    "Pilot batches: outdoor only",
    "Entire growth period indoors",
    "*G5 released at Week 11"
  )
)

housing_protocol %>%
  kable(col.names = c("Batch Range", "Generation", "Treatment", "Brooding Phase", "Outdoor Phase", "Release Age", "Notes"),
        align = c("c", "c", "c", "l", "l", "c", "l")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#319795", color = "white") %>%
  column_spec(3, bold = TRUE, color = "#285e61")
Batch Range Generation Treatment Brooding Phase Outdoor Phase Release Age Notes
0-2 G3 Outdoor Only Indoor (Wk 0-9) Outdoor (Wk 9-19) Week 9 (Day 63) Pilot batches: outdoor only
3-13 G4 Indoor Indoor (Wk 0-19) Entire growth period indoors
3-13 G4-G5 Outdoor Indoor (Wk 0-9 or 0-11*) Outdoor (Wk 9-19 or 11-19*) Week 9 or 11* (Day 63 or 77) *G5 released at Week 11
# Load feed intake data from Excel
feed_file <- "Analysis_with_wombat2/TililiFeed/Feed offered G345.xlsx"

# Initialize empty data frames in case loading fails
feed_long <- data.frame()
feed_summary_16wk <- data.frame()
feed_summary_gen <- data.frame()

# Try to load feed data
tryCatch({
  if (file.exists(feed_file)) {
    feed_raw <- read_excel(feed_file, sheet = "Sheet1")
    
    # Reshape from wide to long format
    feed_long <- feed_raw %>%
      rename(Environment = Enviroment) %>%
      pivot_longer(cols = starts_with("Week"), 
                   names_to = "Week", 
                   values_to = "Feed_g_per_bird_per_day") %>%
      mutate(
        Week_num = as.numeric(gsub("Week", "", Week)),
        Generation = paste0("G", Generation),
        Batch = as.character(Batch),
        Feed_kg_per_bird_per_week = (Feed_g_per_bird_per_day * 7) / 1000
      )
    
    # Summary: Total feed offered per bird (Weeks 1-16)
    feed_summary_16wk <- feed_long %>%
      filter(Week_num <= 16) %>%
      group_by(Generation, Batch, Environment) %>%
      summarise(
        Total_Feed_kg_per_bird = sum(Feed_kg_per_bird_per_week, na.rm = TRUE),
        Avg_Daily_Feed_g = mean(Feed_g_per_bird_per_day, na.rm = TRUE),
        .groups = "drop"
      ) %>%
      arrange(Generation, Batch, Environment)
    
    # Overall summary by generation and environment
    feed_summary_gen <- feed_long %>%
      filter(Week_num <= 16) %>%
      group_by(Generation, Environment) %>%
      summarise(
        N_Batch_Treatment = n_distinct(Batch),
        Avg_Total_Feed_kg_per_bird = mean((Feed_g_per_bird_per_day * 7 / 1000) * 16, na.rm = TRUE),
        Min_Feed_kg = min((Feed_g_per_bird_per_day * 7 / 1000) * 16, na.rm = TRUE),
        Max_Feed_kg = max((Feed_g_per_bird_per_day * 7 / 1000) * 16, na.rm = TRUE),
        .groups = "drop"
      )
  }
}, error = function(e) {
  cat("Warning: Could not load feed data.\n")
})

8.1.2 Feed Intake Summary (Weeks 1-16)

if (nrow(feed_summary_gen) > 0) {
  feed_summary_gen %>%
    mutate(
      Avg_Total_Feed_kg_per_bird = round(Avg_Total_Feed_kg_per_bird, 2),
      Min_Feed_kg = round(Min_Feed_kg, 2),
      Max_Feed_kg = round(Max_Feed_kg, 2)
    ) %>%
    kable(col.names = c("Generation", "Environment", "N Batches", 
                        "Mean Feed (kg/bird)", "Min (kg)", "Max (kg)"),
          align = c("c", "c", "c", "r", "r", "r")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#319795", color = "white") %>%
    column_spec(4, bold = TRUE, color = "#234e52", background = "#e6fffa")
} else {
  cat("Feed summary data not available.\n")
}
Generation Environment N Batches Mean Feed (kg/bird) Min (kg) Max (kg)
G3 Outdoor 3 4.98 1.23 8.96
G4 Indoor 11 5.16 1.23 8.29
G4 Outdoor 11 4.42 1.23 6.72
G5 Outdoor 4 5.00 1.23 8.20

8.1.3 Feed Allowance by Week

# Calculate mean feed by generation and week
if (nrow(feed_long) > 0) {
  feed_weekly <- feed_long %>%
    filter(Week_num <= 19) %>%
    group_by(Generation, Week_num) %>%
    summarise(
      Mean_Feed_g = mean(Feed_g_per_bird_per_day, na.rm = TRUE),
      Min_Feed_g = min(Feed_g_per_bird_per_day, na.rm = TRUE),
      Max_Feed_g = max(Feed_g_per_bird_per_day, na.rm = TRUE),
      .groups = "drop"
    )
  
  # Check if we have valid data
  if (nrow(feed_weekly) > 0 && !all(is.na(feed_weekly$Max_Feed_g))) {
    # Calculate y-axis maximum safely
    y_max <- max(feed_weekly$Max_Feed_g, na.rm = TRUE)
    y_breaks <- seq(0, ceiling(y_max / 20) * 20 + 20, by = 20)
    
    ggplot(feed_weekly, aes(x = Week_num, y = Mean_Feed_g, color = Generation, group = Generation)) +
      geom_line(linewidth = 1.2) +
      geom_point(size = 2.5) +
      geom_ribbon(aes(ymin = Min_Feed_g, ymax = Max_Feed_g, fill = Generation), alpha = 0.15, color = NA) +
      geom_vline(xintercept = 16, linetype = "dashed", color = "#CC3311", linewidth = 0.8) +
      annotate("text", x = 16.5, y = y_max * 0.95, 
               label = "Week 16\n(Evaluation)", hjust = 0, size = 3.5, color = "#CC3311", fontface = "bold") +
      scale_color_manual(values = gen_colors) +
      scale_fill_manual(values = gen_colors) +
      scale_x_continuous(breaks = seq(1, 19, by = 2)) +
      scale_y_continuous(breaks = y_breaks) +
      labs(
        title = "Feed Allowance Schedule by Generation (Weeks 1-19)",
        subtitle = "Shaded area shows range across batches/environments | G4 & G5 receive 1.2× baseline (G3)",
        x = "Week of Age",
        y = "Feed Allowance (g/bird/day)",
        color = "Generation",
        fill = "Generation"
      ) +
      theme_tilili()
  } else {
    cat("Note: Feed data not available for plotting.\n")
  }
} else {
  cat("Note: Feed data not available.\n")
}

# Feed by environment and generation
if (nrow(feed_long) > 0) {
  feed_env <- feed_long %>%
    filter(Week_num <= 16) %>%
    group_by(Generation, Environment, Week_num) %>%
    summarise(
      Mean_Feed_g = mean(Feed_g_per_bird_per_day, na.rm = TRUE),
      .groups = "drop"
    )
  
  ggplot(feed_env, aes(x = Week_num, y = Mean_Feed_g, color = Environment, linetype = Generation)) +
    geom_line(linewidth = 1.0) +
    geom_point(size = 2) +
    scale_color_manual(values = c("Indoor" = "#3182ce", "Outdoor" = "#38a169")) +
    scale_x_continuous(breaks = seq(1, 16, by = 2)) +
    labs(
      title = "Feed Allowance by Environment and Generation (Weeks 1-16)",
      subtitle = "Indoor vs. Outdoor treatments | Outdoor birds supplement through foraging",
      x = "Week of Age",
      y = "Feed Allowance (g/bird/day)",
      color = "Environment",
      linetype = "Generation"
    ) +
    theme_tilili()
} else {
  cat("Note: Feed data not available for environment comparison plot.\n")
}

📊 Feed Conversion and Growth Efficiency

The feed allowance data presented here represents the offered feed per bird per day. Actual feed intake and feed conversion ratio (FCR) would require measurement of:

  1. Actual consumption: Feed disappearance (offered - refused/spillage)
  2. Body weight gain: Weekly weight measurements
  3. FCR calculation: Feed consumed (kg) ÷ Body weight gain (kg)

For outdoor birds, foraging contribution is not quantified but is expected to:

  • Reduce dependence on provided supplemental feed
  • Provide micronutrients (insects, greens)
  • Improve gut health and immune function
  • Lower feed costs for smallholder farmers

Future recommendations: Implement feed intake recording systems to estimate FCR by generation and housing system for economic evaluation.

✅ Key Feed Observations

  1. Generation 3 baseline: Lower feed allowance as these were layer-type birds (no 1.2× multiplier)
  2. Generations 4 & 5: Higher feed provision (1.2× baseline) to support dual-purpose genetics
  3. Environment effect: Outdoor birds received 80% of standard allowance while indoor birds received 100% (ad libitum)
  4. Foraging contribution: Outdoor birds compensate the 20% feed restriction through natural foraging (insects, seeds, vegetation)
  5. Evaluation age: All birds assessed at Week 16 for body weight

8.2 Tilili Body Weight Genetic Parameters

tilili_summary <- pheno_stats %>% filter(Line == "Tilili")
tilili_vc_data <- if(!is.null(vc_summary)) vc_summary %>% filter(Line == "Tilili") else NULL

# Tilili-specific stats
n_total_tilili <- nrow(tilili_data)
n_with_bw_tilili <- sum(!is.na(tilili_data$BW_16wk))
mean_bw_tilili <- round(mean(tilili_data$BW_16wk, na.rm = TRUE), 0)
sd_bw_tilili <- round(sd(tilili_data$BW_16wk, na.rm = TRUE), 0)
cv_bw_tilili <- round(sd_bw_tilili / mean_bw_tilili * 100, 1)

male_bw_tilili <- round(mean(tilili_data$BW_16wk[tilili_data$Sex == "M"], na.rm = TRUE), 0)
female_bw_tilili <- round(mean(tilili_data$BW_16wk[tilili_data$Sex == "F"], na.rm = TRUE), 0)
dimorphism_tilili <- round((male_bw_tilili - female_bw_tilili) / female_bw_tilili * 100, 1)

gen_counts_tilili <- table(tilili_data$Generation)

# Get heritability for Tilili
h2_tilili <- if(!is.null(tilili_vc_data) && nrow(tilili_vc_data) > 0) tilili_vc_data$h2 else NA
h2_SE_tilili <- if(!is.null(tilili_vc_data) && nrow(tilili_vc_data) > 0) tilili_vc_data$h2_SE else NA
5,906
Total Birds
3,783
With BW_16wk
1073g
Mean Weight
0.196
Heritability
3
Generations

8.2.1 Phenotypic Summary

data.frame(
  Metric = c("Total Birds", "Birds with BW_16wk", "Mean BW_16wk (g)", "SD BW_16wk (g)", 
             "CV (%)", "Min BW_16wk (g)", "Max BW_16wk (g)",
             "Male BW_16wk (g)", "Female BW_16wk (g)", "Sexual Dimorphism (%)"),
  Value = c(n_total_tilili, n_with_bw_tilili, mean_bw_tilili, sd_bw_tilili, cv_bw_tilili,
            round(min(tilili_data$BW_16wk, na.rm = TRUE), 0),
            round(max(tilili_data$BW_16wk, na.rm = TRUE), 0),
            male_bw_tilili, female_bw_tilili, dimorphism_tilili)
) %>%
  kable(align = c("l", "r")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#319795", color = "white")
Metric Value
Total Birds 5906.0
Birds with BW_16wk 3783.0
Mean BW_16wk (g) 1073.0
SD BW_16wk (g) 303.0
CV (%) 28.2
Min BW_16wk (g) 110.0
Max BW_16wk (g) 2210.0
Male BW_16wk (g) 1210.0
Female BW_16wk (g) 956.0
Sexual Dimorphism (%) 26.6

8.2.2 Population Structure by Generation

tilili_data %>%
  group_by(Generation) %>%
  summarise(
    N = n(),
    N_BW = sum(!is.na(BW_16wk)),
    Mean_BW = round(mean(BW_16wk, na.rm = TRUE), 0),
    SD_BW = round(sd(BW_16wk, na.rm = TRUE), 0),
    N_Sires = n_distinct(Sire_ID[!is.na(Sire_ID)]),
    N_Dams = n_distinct(Dam_ID[!is.na(Dam_ID)]),
    .groups = "drop"
  ) %>%
  mutate(Generation = paste0("G", Generation)) %>%
  kable(col.names = c("Generation", "N Total", "N with BW", "Mean BW (g)", "SD (g)", "N Sires", "N Dams"),
        align = c("l", rep("r", 6))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#319795", color = "white")
Generation N Total N with BW Mean BW (g) SD (g) N Sires N Dams
G3 722 458 1188 343 48 212
G4 4112 2537 1014 279 44 289
G5 1072 788 1198 295 16 87
tilili_data %>%
  filter(!is.na(BW_16wk)) %>%
  mutate(Generation = paste0("G", Generation)) %>%
  ggplot(aes(x = Generation, y = BW_16wk, fill = Generation)) +
  geom_boxplot(alpha = 0.8, outlier.shape = 21) +
  geom_jitter(alpha = 0.2, width = 0.2, size = 0.8) +
  scale_fill_manual(values = gen_colors) +
  labs(title = "Tilili: Body Weight at 16 Weeks by Generation",
       x = "Generation", y = "Body Weight (g)") +
  theme_tilili() + theme(legend.position = "none")

8.2.3 Variance Components and Heritability

if (!is.null(tilili_vc_data) && nrow(tilili_vc_data) > 0) {
  tilili_vc_data %>%
    kable(col.names = c("Line", "N", "Va", "SE(Va)", "Ve", "SE(Ve)", "Vp", "h²", "SE(h²)"),
          align = c("l", rep("r", 8))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
    row_spec(0, bold = TRUE, background = "#319795", color = "white")
} else {
  cat("Variance component results not available for Tilili.")
}
Line N Va SE(Va) Ve SE(Ve) Vp SE(h²)
Tilili Tilili 3710 12914.1 2674.6 52893.5 2018.5 65807.5 0.196 0.037

8.2.4 Tilili EBV Summary

tilili_ebv <- ebv_all %>% filter(Line == "Tilili")

if (nrow(tilili_ebv) > 0) {
  data.frame(
    Metric = c("N Animals with EBV", "Mean EBV (g)", "SD EBV (g)", 
               "Min EBV (g)", "Max EBV (g)", "EBV Range (g)"),
    Value = c(nrow(tilili_ebv),
              round(mean(tilili_ebv$EBV, na.rm = TRUE), 1),
              round(sd(tilili_ebv$EBV, na.rm = TRUE), 1),
              round(min(tilili_ebv$EBV, na.rm = TRUE), 1),
              round(max(tilili_ebv$EBV, na.rm = TRUE), 1),
              round(max(tilili_ebv$EBV, na.rm = TRUE) - min(tilili_ebv$EBV, na.rm = TRUE), 1))
  ) %>%
    kable(align = c("l", "r")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
    row_spec(0, bold = TRUE, background = "#319795", color = "white")
}
Metric Value
N Animals with EBV 3882.0
Mean EBV (g) 43.7
SD EBV (g) 74.8
Min EBV (g) -182.3
Max EBV (g) 312.6
EBV Range (g) 494.9
if (nrow(tilili_ebv) > 0) {
  ggplot(tilili_ebv, aes(x = EBV)) +
    geom_histogram(bins = 30, fill = "#319795", color = "white", alpha = 0.8) +
    geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) +
    geom_vline(xintercept = mean(tilili_ebv$EBV, na.rm = TRUE), 
               linetype = "solid", color = "#EE7733", linewidth = 1) +
    labs(title = "Tilili: EBV Distribution for Body Weight at 16 Weeks",
         subtitle = "Black dashed = 0 | Orange solid = Mean EBV",
         x = "EBV (g)", y = "Count") +
    theme_tilili()
}

8.2.5 Top 10 Tilili Breeding Animals (by EBV)

if (nrow(tilili_ebv) > 0) {
  tilili_ebv %>%
    arrange(desc(EBV)) %>%
    head(10) %>%
    mutate(
      Rank = 1:10,
      EBV = round(EBV, 1),
      SE_EBV = ifelse(is.na(SE_EBV), "-", round(SE_EBV, 1)),
      Accuracy = ifelse(is.na(Accuracy), "-", round(Accuracy, 2))
    ) %>%
    select(Rank, ID_original, EBV, SE_EBV, Accuracy) %>%
    kable(col.names = c("Rank", "Animal ID", "EBV (g)", "SE", "Accuracy"),
          align = c("c", "l", rep("r", 3))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
    row_spec(0, bold = TRUE, background = "#276749", color = "white") %>%
    row_spec(1, bold = TRUE, background = "#c6f6d5")
}
Rank Animal ID EBV (g) SE Accuracy
Tilili.1733 1 6233 312.6 85.3 0.74
Tilili.1732 2 6235 295.0 85.3 0.74
Tilili.2637 3 2481 273.2 85.3 0.74
Tilili.2633 4 2485 267.3 85.3 0.74
Tilili.3075 5 1941 265.6 85.3 0.74
Tilili.3377 6 1370 257.4 85.3 0.74
Tilili.3376 7 1371 255.7 85.3 0.74
Tilili.703 8 257 255.7 63.7 0.83
Tilili.3528 9 1054 254.2 85.3 0.74
Tilili.3527 10 1055 253.2 85.3 0.74

8.3 Tilili Egg Weight Analysis

# Load and process egg weight data
df_egg_weight_raw <- tryCatch({
  read_csv(egg_weight_file, show_col_types = FALSE) %>%
    clean_names()
}, error = function(e) {
  data.frame()
})

if (nrow(df_egg_weight_raw) > 0) {
  week_cols_ew <- names(df_egg_weight_raw)[grepl("^week_", names(df_egg_weight_raw))]
  
  df_egg_weight_long <- df_egg_weight_raw %>%
    select(bird_id = chicken_id, sire_id, dam_id, generation = generation3, batch, pen, all_of(week_cols_ew)) %>%
    pivot_longer(cols = all_of(week_cols_ew), names_to = "week", values_to = "egg_weight") %>%
    mutate(
      week = as.numeric(str_extract(week, "\\d+")),
      egg_weight = na_if(egg_weight, 0),
      egg_weight = if_else(!is.na(egg_weight) & egg_weight > 100, NA_real_, egg_weight),
      generation = factor(generation, levels = sort(unique(generation)), labels = paste0("G", sort(unique(generation)))),
      batch = factor(batch)
    )
  
  n_birds_ew <- n_distinct(df_egg_weight_long$bird_id)
  n_records_ew <- sum(!is.na(df_egg_weight_long$egg_weight))
  mean_ew <- round(mean(df_egg_weight_long$egg_weight, na.rm = TRUE), 1)
  sd_ew <- round(sd(df_egg_weight_long$egg_weight, na.rm = TRUE), 1)
  n_gen_ew <- n_distinct(df_egg_weight_long$generation)
} else {
  n_birds_ew <- 0; n_records_ew <- 0; mean_ew <- NA; sd_ew <- NA; n_gen_ew <- 0
  df_egg_weight_long <- data.frame()
}
170
Birds
4,389
Valid Records
53.1g
Mean Weight
2
Generations
31-61
Week Range

8.3.1 Weekly Means by Generation

if (nrow(df_egg_weight_long) > 0) {
  weekly_gen_ew <- df_egg_weight_long %>%
    group_by(generation, week) %>%
    summarise(n = sum(!is.na(egg_weight)), mean = mean(egg_weight, na.rm = TRUE),
              sd = sd(egg_weight, na.rm = TRUE), se = sd / sqrt(n), .groups = "drop") %>%
    filter(n > 0)
} else {
  weekly_gen_ew <- data.frame()
}
if (nrow(weekly_gen_ew) > 0) {
  ggplot(weekly_gen_ew, aes(x = week, y = mean, color = generation, group = generation)) +
    geom_line(linewidth = 1.0) +
    geom_point(size = 2.0) +
    geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.3, alpha = 0.5) +
    scale_color_manual(values = gen_colors) +
    scale_x_continuous(breaks = seq(31, 61, by = 2)) +
    labs(title = "Tilili: Mean Egg Weight by Week of Age",
         subtitle = "Comparison by Generation | Error bars: ± 1 SE",
         x = "Week of Age", y = "Mean Egg Weight (g)", color = "Generation",
         caption = "Note: Zeros treated as missing | Outliers (>100g) removed") +
    theme_tilili()
}

8.3.2 Overall Generation Summary - Egg Weight

if (nrow(df_egg_weight_long) > 0) {
  df_egg_weight_long %>%
    group_by(generation) %>%
    summarise(n_birds = n_distinct(bird_id), n_records = sum(!is.na(egg_weight)),
              mean_egg_weight = round(mean(egg_weight, na.rm = TRUE), 1),
              sd_egg_weight = round(sd(egg_weight, na.rm = TRUE), 1),
              min_egg_weight = round(min(egg_weight, na.rm = TRUE), 1),
              max_egg_weight = round(max(egg_weight, na.rm = TRUE), 1), .groups = "drop") %>%
    kable(col.names = c("Generation", "N Birds", "N Records", "Mean (g)", "SD (g)", "Min (g)", "Max (g)"),
          align = c("l", rep("r", 6))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#319795", color = "white")
}
Generation N Birds N Records Mean (g) SD (g) Min (g) Max (g)
G3 85 2069 52.3 4.6 39 69
G4 85 2320 53.8 4.6 35 74

8.3.3 Weekly Means by Batch - Egg Weight

if (nrow(df_egg_weight_long) > 0) {
  weekly_batch_ew <- df_egg_weight_long %>%
    group_by(batch, week) %>%
    summarise(n = sum(!is.na(egg_weight)), mean = mean(egg_weight, na.rm = TRUE), .groups = "drop") %>%
    filter(n > 0)
  
  ggplot(weekly_batch_ew, aes(x = week, y = mean, color = batch, group = batch)) +
    geom_line(linewidth = 0.8) +
    geom_point(size = 1.5, alpha = 0.7) +
    scale_color_manual(values = batch_colors) +
    scale_x_continuous(breaks = seq(31, 61, by = 5)) +
    labs(title = "Tilili: Mean Egg Weight by Week of Age",
         subtitle = "Comparison by Batch",
         x = "Week of Age", y = "Mean Egg Weight (g)", color = "Batch") +
    theme_tilili()
}


8.4 Tilili Egg Number Analysis

# Load and process egg number data
# Define the egg number file paths explicitly
egg_num_merged <- file.path(wombat_base, "Tililieggnum", "Merged_egg.csv")
egg_num_master <- file.path(wombat_base, "Tililieggnum", "Masterfile_egg.csv")

# Load egg number data - try Merged_egg first (it's actually ODS format despite .csv extension)
df_egg_number_raw <- NULL

# First, try to read Merged_egg.csv as ODS (it's actually in ODS format)
if (file.exists(egg_num_merged)) {
  df_egg_number_raw <- tryCatch({
    temp_data <- read_ods(egg_num_merged, sheet = 1, col_names = TRUE)
    if (nrow(temp_data) > 0) {
      temp_data %>% clean_names()
    } else {
      NULL
    }
  }, error = function(e) {
    NULL
  })
}

# If Merged_egg failed or empty, use Masterfile_egg.csv
if (is.null(df_egg_number_raw) || nrow(df_egg_number_raw) == 0) {
  if (file.exists(egg_num_master)) {
    df_egg_number_raw <- tryCatch({
      read_csv(egg_num_master, show_col_types = FALSE) %>% clean_names()
    }, error = function(e) {
      tryCatch({
        read.csv(egg_num_master, stringsAsFactors = FALSE) %>% clean_names()
      }, error = function(e2) {
        data.frame()
      })
    })
  }
}

# Process the data
if (!is.null(df_egg_number_raw) && nrow(df_egg_number_raw) > 0) {
  # Get week columns
  week_cols_en <- names(df_egg_number_raw)[grepl("^week_", names(df_egg_number_raw))]
  
  # Find bird_id column - check multiple possible names
  possible_id_cols <- c("birds_id", "bird_id", "chicken_id", "chickenid", "id")
  bird_id_col <- NULL
  for (col in possible_id_cols) {
    if (col %in% names(df_egg_number_raw)) {
      bird_id_col <- col
      break
    }
  }
  
  if (!is.null(bird_id_col) && length(week_cols_en) > 0) {
    # Convert week columns to numeric
    df_egg_number_raw <- df_egg_number_raw %>%
      mutate(across(all_of(week_cols_en), ~as.numeric(as.character(.))))
    
    # Select available columns
    core_cols <- c(bird_id_col, "generation", "batch")
    optional_cols <- c("sire_id", "dam_id", "pen")
    available_core <- intersect(core_cols, names(df_egg_number_raw))
    available_optional <- intersect(optional_cols, names(df_egg_number_raw))
    all_select_cols <- c(available_core, available_optional, week_cols_en)
    
    df_egg_number_long <- df_egg_number_raw %>%
      select(all_of(all_select_cols)) %>%
      rename(bird_id = all_of(bird_id_col)) %>%
      pivot_longer(cols = all_of(week_cols_en), names_to = "week", values_to = "egg_number") %>%
      mutate(
        week = as.numeric(str_extract(week, "\\d+")),
        egg_number = na_if(egg_number, 0),
        generation = factor(generation, levels = sort(unique(generation)), labels = paste0("G", sort(unique(generation)))),
        batch = factor(batch)
      )
    
    n_birds_en <- n_distinct(df_egg_number_long$bird_id)
    n_records_en <- sum(!is.na(df_egg_number_long$egg_number))
    mean_en <- round(mean(df_egg_number_long$egg_number, na.rm = TRUE), 2)
    sd_en <- round(sd(df_egg_number_long$egg_number, na.rm = TRUE), 2)
    n_gen_en <- n_distinct(df_egg_number_long$generation)
    
    cat("Egg number data loaded:", n_birds_en, "birds,", n_records_en, "records\n")
  } else {
    cat("Warning: Could not find bird_id column or week columns in egg number data\n")
    cat("Available columns:", paste(names(df_egg_number_raw), collapse = ", "), "\n")
    n_birds_en <- 0; n_records_en <- 0; mean_en <- NA; sd_en <- NA; n_gen_en <- 0
    df_egg_number_long <- data.frame()
  }
} else {
  cat("Warning: Could not load egg number data from either file\n")
  n_birds_en <- 0; n_records_en <- 0; mean_en <- NA; sd_en <- NA; n_gen_en <- 0
  df_egg_number_long <- data.frame()
}
## Egg number data loaded: 180 birds, 4892 records
180
Birds
4,892
Valid Records
4.33
Mean Eggs/Week
2
Generations
31-61
Week Range

8.4.1 Weekly Means by Generation

if (nrow(df_egg_number_long) > 0) {
  weekly_gen_en <- df_egg_number_long %>%
    group_by(generation, week) %>%
    summarise(n = sum(!is.na(egg_number)), mean = mean(egg_number, na.rm = TRUE),
              sd = sd(egg_number, na.rm = TRUE), se = sd / sqrt(n), .groups = "drop") %>%
    filter(n > 0)
} else {
  weekly_gen_en <- data.frame()
}
if (nrow(weekly_gen_en) > 0) {
  ggplot(weekly_gen_en, aes(x = week, y = mean, color = generation, group = generation)) +
    geom_line(linewidth = 1.0) +
    geom_point(size = 2.0) +
    geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.3, alpha = 0.5) +
    scale_color_manual(values = gen_colors) +
    scale_x_continuous(breaks = seq(31, 61, by = 2)) +
    labs(title = "Tilili: Mean Egg Number by Week of Age",
         subtitle = "Comparison by Generation | Error bars: ± 1 SE",
         x = "Week of Age", y = "Mean Egg Number (eggs/week)", color = "Generation",
         caption = "Note: Zeros treated as missing") +
    theme_tilili()
}

8.4.2 Overall Generation Summary - Egg Number

if (nrow(df_egg_number_long) > 0) {
  df_egg_number_long %>%
    group_by(generation) %>%
    summarise(n_birds = n_distinct(bird_id), n_records = sum(!is.na(egg_number)),
              mean_egg_number = round(mean(egg_number, na.rm = TRUE), 2),
              sd_egg_number = round(sd(egg_number, na.rm = TRUE), 2),
              min_egg_number = round(min(egg_number, na.rm = TRUE), 2),
              max_egg_number = round(max(egg_number, na.rm = TRUE), 2), .groups = "drop") %>%
    kable(col.names = c("Generation", "N Birds", "N Records", "Mean", "SD", "Min", "Max"),
          align = c("l", rep("r", 6))) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
    row_spec(0, bold = TRUE, background = "#319795", color = "white")
}
Generation N Birds N Records Mean SD Min Max
G3 90 2228 4.37 1.67 1.0 7
G4 90 2664 4.29 1.67 0.2 7

8.4.3 Weekly Means by Batch - Egg Number

if (nrow(df_egg_number_long) > 0) {
  weekly_batch_en <- df_egg_number_long %>%
    group_by(batch, week) %>%
    summarise(n = sum(!is.na(egg_number)), mean = mean(egg_number, na.rm = TRUE), .groups = "drop") %>%
    filter(n > 0)
  
  ggplot(weekly_batch_en, aes(x = week, y = mean, color = batch, group = batch)) +
    geom_line(linewidth = 0.8) +
    geom_point(size = 1.5, alpha = 0.7) +
    scale_color_manual(values = batch_colors) +
    scale_x_continuous(breaks = seq(31, 61, by = 5)) +
    labs(title = "Tilili: Mean Egg Number by Week of Age",
         subtitle = "Comparison by Batch",
         x = "Week of Age", y = "Mean Egg Number (eggs/week)", color = "Batch") +
    theme_tilili()
}


8.5 Tilili Combined Trait Summary

if (nrow(weekly_gen_ew) > 0 && nrow(weekly_gen_en) > 0) {
  combined_weekly <- bind_rows(
    weekly_gen_ew %>% select(generation, week, mean, se) %>% mutate(trait = "Egg Weight (g)"),
    weekly_gen_en %>% select(generation, week, mean, se) %>% mutate(trait = "Egg Number")
  )
  
  ggplot(combined_weekly, aes(x = week, y = mean, color = generation, group = generation)) +
    geom_line(linewidth = 1.0) +
    geom_point(size = 1.5) +
    geom_ribbon(aes(ymin = mean - se, ymax = mean + se, fill = generation), alpha = 0.2, color = NA) +
    facet_wrap(~ trait, scales = "free_y", ncol = 1) +
    scale_color_manual(values = gen_colors) +
    scale_fill_manual(values = gen_colors) +
    scale_x_continuous(breaks = seq(31, 61, by = 5)) +
    labs(title = "Tilili: Egg Production Traits by Week of Age",
         subtitle = "Generation Comparison | Shaded area: ± 1 SE",
         x = "Week of Age", y = "Mean Value", color = "Generation", fill = "Generation") +
    theme_tilili() +
    theme(strip.text = element_text(face = "bold", size = 12))
}

8.5.1 Overall Trait Summary Table

data.frame(
  Trait = c("Body Weight (16 wk)", "Egg Weight (g)", "Egg Number"),
  N_Birds = c(n_with_bw_tilili, n_birds_ew, n_birds_en),
  N_Records = c(n_with_bw_tilili, n_records_ew, n_records_en),
  Overall_Mean = c(mean_bw_tilili, mean_ew, mean_en),
  Overall_SD = c(sd_bw_tilili, sd_ew, sd_en),
  Heritability = c(ifelse(!is.na(h2_tilili), h2_tilili, "N/A"), "TBD", "TBD")
) %>%
  kable(col.names = c("Trait", "N Birds", "N Records", "Mean", "SD", "h²"),
        align = c("l", rep("r", 5))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#319795", color = "white")
Trait N Birds N Records Mean SD
Body Weight (16 wk) 3783 3783 1073.00 303.00 0.196
Egg Weight (g) 170 4389 53.10 4.60 TBD
Egg Number 180 4892 4.33 1.67 TBD

9 Limitations

⚠️ Important Limitations to Consider

  1. Small sample sizes: Population sizes range from 662 to 5906 animals per line, which limits statistical power.

  2. Single trait analysis for most lines: Only body weight at 16 weeks is analyzed for all lines. Tilili additionally includes egg production traits.

  3. Limited pedigree depth: Only a few generations. G0 founders have unknown parents.

  4. Limited relatedness: Small number of sires and dams creates unbalanced family structures.

  5. No genomic data: Pedigree-based relationships may not capture actual genetic relationships.

  6. NHIL-20 heritability concern: The high h² (0.76) may be inflated due to population substructure (2 subpopulations detected).

  7. Mating ratio variability: While targets are 1:4 (ILRI lines) and 1:10 (Tilili), actual observed ratios may vary by batch and generation.


10 Conclusions and Recommendations

10.1 Key Findings

✅ Summary of Results

Measure Finding
Total birds evaluated 9,696 across 5 lines
Lines analyzed LIL-20, BIL-20, NHIL-20, WIL-20, Tilili
Heritability range 0.196 to 0.76
Target mating ratio 1:4 (ILRI lines) / 1:10 (Tilili)
Genetic trend Positive for most lines
NHIL-20 subpopulations 2 distinct clusters detected

Tilili-Specific Findings:

Trait Value
Body Weight (16 wk) Mean: 1073g (SD: 303g, CV: 28.2%)
Heritability 0.196 ± 0.037
Egg Weight Mean: 53.1g across 4389 records
Egg Number Mean: 4.33 eggs/week across 4892 records

10.3 Future Direction: Optimal Mate Allocation with MateSelect

🧬 MateSelect: Balancing Genetic Gain and Inbreeding Control

MateSelect is a specialized software tool designed for optimal mate allocation in livestock breeding programs. It uses advanced optimization algorithms to identify the best mating combinations that maximize genetic progress while constraining the rate of inbreeding accumulation.

10.3.1 Why MateSelect?

Current challenges in the ILRI poultry breeding program:

  • Limited number of sires: With 1:4 (ILRI) and 1:10 (Tilili) mating ratios, careful sire allocation is critical
  • Inbreeding accumulation: Some lines show F > 10% in later generations
  • Suboptimal mate pairing: Random or intuitive mating decisions may not optimize genetic gain
  • Multiple trait goals: Balancing body weight, egg production, and adaptability requires sophisticated planning

10.3.2 Key Benefits

Benefit Impact
Maximize genetic gain 10-20% higher response to selection compared to random mating
Control inbreeding Keep ΔF < 1% per generation to preserve genetic diversity
Optimal contribution Balance individual EBVs with their relatedness to the population
Long-term sustainability Maintain effective population size (Ne) for future generations

💻 How MateSelect Works

MateSelect uses Optimal Contribution Selection (OCS) theory to solve a constrained optimization problem:

Objective: Maximize genetic gain = Σ cᵢ × EBVᵢ

Subject to: Rate of inbreeding ΔF ≤ target (e.g., 0.5% or 1%)

Where: - cᵢ = contribution (proportion of offspring) from animal i - EBVᵢ = Estimated Breeding Value of animal i - ΔF = (Fₜ₊₁ - Fₜ) / (1 - Fₜ) = rate of inbreeding increase

The software determines:

  1. Which animals to select as parents for the next generation
  2. How many offspring each parent should contribute
  3. Which specific matings to perform to minimize relationship between mates

📊 Expected Outcomes from Implementing MateSelect

Current Approach With MateSelect
Random/intuitive mating Optimized mate allocation
Uncontrolled inbreeding accumulation ΔF constrained to < 1% per generation
Variable genetic gain Maximized genetic response
Risk of genetic bottleneck Maintained effective population size
Single-trait focus Multi-trait optimization possible

Projected improvements:

  • Genetic gain: 15-25% higher genetic progress per generation
  • Inbreeding rate: Reduced by 30-50% compared to truncation selection
  • Long-term Ne: Maintained above critical threshold (Ne > 50)

🚀 Implementation Roadmap for MateSelect

Phase 1: Data Preparation

  • Export EBVs for all selection candidates from WOMBAT
  • Generate pedigree-based relationship matrix (A-matrix)
  • Define breeding objectives and trait weightings

Phase 2: Software Setup

  • Install MateSelect or equivalent OCS software (e.g., EVA, GENCONT)
  • Configure constraints: target ΔF, minimum/maximum contributions per animal
  • Set mating constraints: avoid full-sib matings, limit half-sib matings

Phase 3: Mate Allocation

  • Run optimization for each breeding cycle
  • Generate mate lists with specific sire × dam combinations
  • Implement matings according to optimal allocations

Phase 4: Monitoring & Feedback

  • Track realized inbreeding vs. predicted
  • Compare actual genetic gain to projections
  • Refine constraints and weightings based on outcomes

Timeline: Pilot implementation in 1-2 lines within the next breeding cycle, followed by program-wide adoption.

⚠️ Important Considerations

  1. Data quality: MateSelect requires accurate EBVs and complete pedigrees — data cleaning is essential
  2. Computational resources: Large populations may require significant processing time
  3. Practical constraints: Account for housing, availability, and reproductive capacity of selected animals
  4. Genetic diversity markers: Consider supplementing pedigree with genomic data for more accurate relationship estimation
  5. Training: Staff training on software operation and interpretation of results