library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(knitr)
library(kableExtra)
library(scales)
theme_report <- function() {
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14, hjust = 0, color = "#1a365d"),
plot.subtitle = element_text(size = 11, hjust = 0, color = "#4a5568"),
legend.position = "bottom",
panel.grid.minor = element_blank(),
axis.title = element_text(face = "bold", size = 11),
strip.text = element_text(face = "bold", size = 11)
)
}
theme_set(theme_report())# =============================================================================
# SET YOUR WORKING DIRECTORY - MODIFY IF NEEDED
# =============================================================================
working_dir <- "C:/Users/SAlemu/OneDrive - CGIAR/Documents/Poultryaudit/Dataset"
setwd(working_dir)
# File paths for phenotypic data
pheno_file_3lines <- "Additional_Poultry dataset_audit.xlsx"
pheno_file_wil <- "Dataset_evaluation_corrected.xlsx"
# Base directory for WOMBAT results
wombat_base <- "Analysis_with_wombat2"
model_dir <- "Model1_Animal_YearMonth"# =============================================================================
# LOAD PHENOTYPIC DATA FOR ALL 4 LINES
# =============================================================================
clean_names <- function(df) {
colnames(df) <- gsub(" ", "_", colnames(df))
return(df)
}
convert_types <- function(df) {
df$Generation <- as.character(df$Generation)
if ("Batch" %in% colnames(df)) df$Batch <- as.character(df$Batch)
df$ID <- as.character(df$ID)
if ("Sire_ID" %in% colnames(df)) df$Sire_ID <- as.character(df$Sire_ID)
if ("Dam_ID" %in% colnames(df)) df$Dam_ID <- as.character(df$Dam_ID)
return(df)
}
# Load LIL, BIL, NHIL from one file
lil_data <- read_excel(pheno_file_3lines, sheet = "LIL") %>% clean_names() %>% convert_types()
bil_data <- read_excel(pheno_file_3lines, sheet = "BIL") %>% clean_names() %>% convert_types()
nhil_data <- read_excel(pheno_file_3lines, sheet = "NHIL") %>% clean_names() %>% convert_types()
# Load WIL from separate file
wil_data <- read_excel(pheno_file_wil) %>% clean_names() %>% convert_types()
# Add Line identifier
lil_data$Line <- "LIL-20"
bil_data$Line <- "BIL-20"
nhil_data$Line <- "NHIL-20"
wil_data$Line <- "WIL-20"# =============================================================================
# EXTRACT GENETIC PARAMETERS FROM WOMBAT OUTPUT
# =============================================================================
lines <- c("LIL", "BIL", "NHIL", "WIL")
all_vc <- list()
all_ebv <- list()
extract_wombat <- function(line_code) {
sum_est_file <- file.path(wombat_base, line_code, model_dir, "SumEstimates.out")
ebv_file <- file.path(wombat_base, line_code, model_dir, "RnSoln_animal.dat")
id_map_file <- file.path(wombat_base, line_code, "ID_mapping.csv")
result <- list(vc = NULL, ebv = NULL)
if (!file.exists(sum_est_file)) {
cat(" Warning:", line_code, "- SumEstimates.out not found\n")
return(result)
}
# ----- EXTRACT VARIANCE COMPONENTS -----
sum_lines <- readLines(sum_est_file)
covs_a_idx <- grep("COVS A 1 1.*vrat", sum_lines)
if (length(covs_a_idx) > 0) {
covs_a_line <- sum_lines[covs_a_idx[1]]
nums <- as.numeric(unlist(regmatches(covs_a_line, gregexpr("[0-9]+\\.?[0-9]*", covs_a_line))))
if (length(nums) >= 7) {
Va <- nums[4]
Va_SE <- nums[5]
h2 <- nums[6]
h2_SE <- nums[7]
}
}
covs_z_idx <- grep("COVS Z 1 1.*vrat", sum_lines)
if (length(covs_z_idx) > 0) {
covs_z_line <- sum_lines[covs_z_idx[1]]
nums_z <- as.numeric(unlist(regmatches(covs_z_line, gregexpr("[0-9]+\\.?[0-9]*", covs_z_line))))
if (length(nums_z) >= 5) {
Ve <- nums_z[4]
Ve_SE <- nums_z[5]
}
}
covs_t_idx <- grep("COVS T 1 1", sum_lines)
if (length(covs_t_idx) > 0) {
covs_t_line <- sum_lines[covs_t_idx[1]]
nums_t <- as.numeric(unlist(regmatches(covs_t_line, gregexpr("[0-9]+\\.?[0-9]*", covs_t_line))))
if (length(nums_t) >= 5) {
Vp <- nums_t[4]
}
}
rec_idx <- grep("No. of records", sum_lines)
if (length(rec_idx) > 0) {
rec_line <- sum_lines[rec_idx[1]]
nums_rec <- as.numeric(unlist(regmatches(rec_line, gregexpr("[0-9]+", rec_line))))
N_rec <- nums_rec[1]
}
if (exists("h2")) {
result$vc <- data.frame(
Line = paste0(line_code, "-20"),
N = ifelse(exists("N_rec"), N_rec, NA),
Va = ifelse(exists("Va"), round(Va, 1), NA),
Va_SE = ifelse(exists("Va_SE"), round(Va_SE, 1), NA),
Ve = ifelse(exists("Ve"), round(Ve, 1), NA),
Ve_SE = ifelse(exists("Ve_SE"), round(Ve_SE, 1), NA),
Vp = ifelse(exists("Vp"), round(Vp, 1), NA),
h2 = round(h2, 3),
h2_SE = round(h2_SE, 3)
)
}
# ----- EXTRACT EBVs -----
if (file.exists(ebv_file) && file.exists(id_map_file)) {
ebv_lines <- readLines(ebv_file)
header_idx <- grep("Run_No", ebv_lines)
if (length(header_idx) > 0) {
data_start <- header_idx[1] + 1
data_lines <- ebv_lines[data_start:length(ebv_lines)]
data_lines <- data_lines[nchar(trimws(data_lines)) > 0]
ebv_data <- read.table(text = data_lines, header = FALSE, fill = TRUE)
if (ncol(ebv_data) >= 7) {
colnames(ebv_data) <- c("Run_No", "ID_num", "Trait", "EBV", "SE_EBV", "Accuracy", "Inbreeding_pct")
} else {
colnames(ebv_data) <- c("Run_No", "ID_num", "Trait", "EBV", "Inbreeding_pct")[1:ncol(ebv_data)]
}
id_map <- read.csv(id_map_file, stringsAsFactors = FALSE)
ebv_merged <- merge(ebv_data, id_map, by.x = "ID_num", by.y = "ID_num", all.x = TRUE)
result$ebv <- data.frame(
Line = paste0(line_code, "-20"),
ID_original = as.character(ebv_merged$ID_orig),
ID_wombat = ebv_merged$ID_num,
EBV = ebv_merged$EBV,
SE_EBV = if ("SE_EBV" %in% colnames(ebv_merged)) ebv_merged$SE_EBV else NA,
Accuracy = if ("Accuracy" %in% colnames(ebv_merged)) ebv_merged$Accuracy else NA,
Inbreeding_pct = if ("Inbreeding_pct" %in% colnames(ebv_merged)) ebv_merged$Inbreeding_pct else NA
)
}
}
return(result)
}
for (ln in lines) {
res <- extract_wombat(ln)
if (!is.null(res$vc)) all_vc[[ln]] <- res$vc
if (!is.null(res$ebv)) all_ebv[[ln]] <- res$ebv
}
vc_summary <- do.call(rbind, all_vc)
ebv_all <- do.call(rbind, all_ebv)# =============================================================================
# COMPUTE PHENOTYPIC SUMMARY STATISTICS
# =============================================================================
compute_pheno_stats <- function(data, line_name) {
total <- nrow(data)
g0 <- sum(data$Generation == "0", na.rm = TRUE)
g1 <- sum(data$Generation == "1", na.rm = TRUE)
g2 <- sum(data$Generation == "2", na.rm = TRUE)
deaths <- sum(!is.na(data$Cull_Reason) & data$Cull_Reason != "", na.rm = TRUE)
survival <- round((1 - deaths / total) * 100, 1)
bw16_n <- sum(!is.na(data$BW_16wk))
bw16_mean <- round(mean(data$BW_16wk, na.rm = TRUE), 0)
bw16_sd <- round(sd(data$BW_16wk, na.rm = TRUE), 0)
bw16_min <- round(min(data$BW_16wk, na.rm = TRUE), 0)
bw16_max <- round(max(data$BW_16wk, na.rm = TRUE), 0)
bw16_cv <- round(bw16_sd / bw16_mean * 100, 1)
male_bw <- data %>% filter(Sex == "M") %>% pull(BW_16wk) %>% mean(na.rm = TRUE) %>% round(0)
female_bw <- data %>% filter(Sex == "F") %>% pull(BW_16wk) %>% mean(na.rm = TRUE) %>% round(0)
dimorphism <- round((male_bw - female_bw) / female_bw * 100, 1)
afe_col <- colnames(data)[grepl("AFE", colnames(data), ignore.case = TRUE)][1]
if (!is.na(afe_col)) {
egg_n <- sum(!is.na(data[[afe_col]]))
} else {
egg_n <- 0
}
n_females <- sum(data$Sex == "F", na.rm = TRUE)
egg_pct <- ifelse(n_females > 0, round(egg_n / n_females * 100, 1), 0)
data.frame(
Line = line_name,
Total = total,
G0 = g0,
G1 = g1,
G2 = g2,
Survival_Pct = survival,
BW16_N = bw16_n,
BW16_Mean = bw16_mean,
BW16_SD = bw16_sd,
BW16_Min = bw16_min,
BW16_Max = bw16_max,
BW16_CV = bw16_cv,
Male_BW16 = male_bw,
Female_BW16 = female_bw,
Dimorphism_Pct = dimorphism,
Egg_N = egg_n,
Egg_Pct_Females = egg_pct
)
}
pheno_stats <- bind_rows(
compute_pheno_stats(lil_data, "LIL-20"),
compute_pheno_stats(bil_data, "BIL-20"),
compute_pheno_stats(nhil_data, "NHIL-20"),
compute_pheno_stats(wil_data, "WIL-20")
)
ebv_stats <- ebv_all %>%
group_by(Line) %>%
summarise(
N = n(),
Mean_EBV = round(mean(EBV, na.rm = TRUE), 1),
SD_EBV = round(sd(EBV, na.rm = TRUE), 1),
Min_EBV = round(min(EBV, na.rm = TRUE), 1),
Max_EBV = round(max(EBV, na.rm = TRUE), 1),
Mean_F = round(mean(Inbreeding_pct, na.rm = TRUE), 2),
N_Inbred = sum(Inbreeding_pct > 0, na.rm = TRUE),
.groups = "drop"
)
total_birds <- sum(pheno_stats$Total)This report presents descriptive statistics and genetic analysis results for four ILRI poultry lines: LIL-20, BIL-20, NHIL-20, and WIL-20. The evaluation encompasses phenotypic characterization across three generations (G0, G1, G2) and genetic parameter estimation using WOMBAT software for body weight at 16 weeks.
This genetic evaluation was initiated for internal audit purposes to assess the current status of ILRIโs poultry breeding program. The analysis establishes baseline genetic parameters and identifies areas for improvement. As the program matures and more data becomes available, this evaluation process will be updated in real time to track genetic progress and inform breeding decisions.
Current Scope:
To ensure continuous genetic progress and timely selection decisions, this genetic evaluation should be conducted twice per year (biannually). Regular evaluations enable:
pheno_stats %>%
select(Line, Total, G0, G1, G2, Survival_Pct) %>%
bind_rows(
data.frame(
Line = "TOTAL",
Total = sum(pheno_stats$Total),
G0 = sum(pheno_stats$G0),
G1 = sum(pheno_stats$G1),
G2 = sum(pheno_stats$G2),
Survival_Pct = round(mean(pheno_stats$Survival_Pct), 1)
)
) %>%
kable(col.names = c("Line", "Total Birds", "G0", "G1", "G2", "Survival %"),
align = c("l", rep("r", 5))) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
row_spec(0, bold = TRUE, background = "#2d3748", color = "white") %>%
row_spec(5, bold = TRUE, background = "#f7fafc")| Line | Total Birds | G0 | G1 | G2 | Survival % |
|---|---|---|---|---|---|
| LIL-20 | 1215 | 612 | 157 | 446 | 74.7 |
| BIL-20 | 797 | 300 | 155 | 342 | 92.8 |
| NHIL-20 | 1116 | 721 | 151 | 244 | 62.5 |
| WIL-20 | 662 | 264 | 92 | 306 | 94.9 |
| TOTAL | 3790 | 1897 | 555 | 1338 | 81.2 |
pheno_stats %>%
select(Line, G0, G1, G2) %>%
pivot_longer(-Line, names_to = "Generation", values_to = "N") %>%
ggplot(aes(x = Line, y = N, fill = Generation)) +
geom_bar(stat = "identity", position = "dodge", width = 0.7) +
geom_text(aes(label = N), position = position_dodge(width = 0.7), vjust = -0.5, size = 3.5) +
scale_fill_manual(values = c("G0" = "#3182ce", "G1" = "#38a169", "G2" = "#805ad5")) +
labs(title = "Population Structure by Line and Generation",
x = "Line", y = "Number of Birds") +
scale_y_continuous(expand = expansion(mult = c(0, 0.15)))pheno_stats %>%
select(Line, BW16_N, BW16_Mean, BW16_SD, BW16_Min, BW16_Max, BW16_CV) %>%
kable(col.names = c("Line", "N", "Mean (g)", "SD (g)", "Min (g)", "Max (g)", "CV %"),
align = c("l", rep("r", 6))) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
row_spec(0, bold = TRUE, background = "#3182ce", color = "white")| Line | N | Mean (g) | SD (g) | Min (g) | Max (g) | CV % |
|---|---|---|---|---|---|---|
| LIL-20 | 793 | 981 | 224 | 231 | 1535 | 22.8 |
| BIL-20 | 686 | 1354 | 281 | 392 | 2254 | 20.8 |
| NHIL-20 | 641 | 1196 | 409 | 336 | 2204 | 34.2 |
| WIL-20 | 601 | 1148 | 272 | 359 | 1875 | 23.7 |
all_pheno <- bind_rows(
lil_data %>% select(Line, BW_16wk),
bil_data %>% select(Line, BW_16wk),
nhil_data %>% select(Line, BW_16wk),
wil_data %>% select(Line, BW_16wk)
)
ggplot(all_pheno %>% filter(!is.na(BW_16wk)), aes(x = Line, y = BW_16wk, fill = Line)) +
geom_boxplot(alpha = 0.8, outlier.shape = 21) +
scale_fill_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169",
"NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
labs(title = "Body Weight at 16 Weeks - Comparison Across Lines",
x = "Line", y = "Body Weight (g)") +
theme(legend.position = "none")pheno_trend <- bind_rows(
lil_data %>% select(Line, Generation, BW_16wk),
bil_data %>% select(Line, Generation, BW_16wk),
nhil_data %>% select(Line, Generation, BW_16wk),
wil_data %>% select(Line, Generation, BW_16wk)
) %>%
filter(!is.na(BW_16wk)) %>%
group_by(Line, Generation) %>%
summarise(
N = n(),
Mean_BW16 = mean(BW_16wk, na.rm = TRUE),
SE_BW16 = sd(BW_16wk, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
) %>%
mutate(Generation = paste0("G", Generation))pheno_trend_wide <- pheno_trend %>%
select(Line, Generation, N, Mean_BW16) %>%
mutate(Mean_BW16 = round(Mean_BW16, 0)) %>%
pivot_wider(names_from = Generation, values_from = c(N, Mean_BW16), names_sep = "_")
pheno_trend_wide %>%
kable(align = c("l", rep("r", ncol(pheno_trend_wide) - 1))) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
row_spec(0, bold = TRUE, background = "#3182ce", color = "white")| Line | N_G0 | N_G1 | N_G2 | Mean_BW16_G0 | Mean_BW16_G1 | Mean_BW16_G2 |
|---|---|---|---|---|---|---|
| BIL-20 | 269 | 124 | 293 | 1422 | 1346 | 1294 |
| LIL-20 | 295 | 135 | 363 | 933 | 949 | 1031 |
| NHIL-20 | 334 | 91 | 216 | 1062 | 1171 | 1415 |
| WIL-20 | 237 | 87 | 277 | 1085 | 1112 | 1214 |
ggplot(pheno_trend, aes(x = Generation, y = Mean_BW16, color = Line, group = Line)) +
geom_line(linewidth = 1.2) +
geom_point(size = 4) +
geom_errorbar(aes(ymin = Mean_BW16 - SE_BW16, ymax = Mean_BW16 + SE_BW16),
width = 0.1, linewidth = 0.8) +
geom_text(aes(label = round(Mean_BW16, 0)), vjust = -1.5, size = 3.5, show.legend = FALSE) +
scale_color_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169",
"NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
labs(title = "Phenotypic Trend: Mean Body Weight at 16 Weeks by Generation",
subtitle = "Error bars represent ยฑ 1 standard error",
x = "Generation", y = "Mean Body Weight (g)", color = "Line") +
theme(legend.position = "bottom") +
scale_y_continuous(expand = expansion(mult = c(0.05, 0.15)))The phenotypic trend shows the change in mean body weight at 16 weeks across generations. An upward trend suggests phenotypic improvement, though this includes both genetic and environmental effects. Note that G0 represents foundation stock, while G1 and G2 are subsequent breeding generations.
pheno_stats %>%
select(Line, Male_BW16, Female_BW16, Dimorphism_Pct) %>%
kable(col.names = c("Line", "Male BW_16wk (g)", "Female BW_16wk (g)", "Dimorphism (%)"),
align = c("l", rep("r", 3))) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
row_spec(0, bold = TRUE, background = "#2d3748", color = "white")| Line | Male BW_16wk (g) | Female BW_16wk (g) | Dimorphism (%) |
|---|---|---|---|
| LIL-20 | 1125 | 866 | 29.9 |
| BIL-20 | 1494 | 1200 | 24.5 |
| NHIL-20 | 1427 | 995 | 43.4 |
| WIL-20 | 1282 | 996 | 28.7 |
Genetic parameters were estimated using WOMBAT software with an animal model. The model included the following fixed effects:
The trait analyzed is body weight at 16 weeks (BW_16wk).
if (!is.null(vc_summary) && nrow(vc_summary) > 0) {
vc_summary %>%
kable(col.names = c("Line", "N", "Va", "SE(Va)", "Ve", "SE(Ve)", "Vp", "hยฒ", "SE(hยฒ)"),
align = c("l", rep("r", 8))) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
row_spec(0, bold = TRUE, background = "#805ad5", color = "white") %>%
row_spec(which(vc_summary$h2 > 0.6), bold = TRUE, background = "#fed7d7")
} else {
cat("WOMBAT results not available. Please ensure Analysis_with_wombat2 folder contains completed analyses.\n")
}| Line | N | Va | SE(Va) | Ve | SE(Ve) | Vp | hยฒ | SE(hยฒ) | |
|---|---|---|---|---|---|---|---|---|---|
| LIL | LIL-20 | 787 | 19579.0 | 4163.9 | 14212.6 | 2787.6 | 33791.6 | 0.579 | 0.097 |
| BIL | BIL-20 | 685 | 12995.1 | 5250.1 | 30647.0 | 4169.6 | 43642.2 | 0.298 | 0.110 |
| NHIL | NHIL-20 | 638 | 89703.6 | 0.0 | 28389.8 | 4796.9 | 118093.0 | 0.760 | 0.031 |
| WIL | WIL-20 | 599 | 15082.8 | 5071.0 | 24419.8 | 3824.7 | 39502.6 | 0.382 | 0.112 |
if (!is.null(vc_summary) && nrow(vc_summary) > 0) {
ggplot(vc_summary, aes(x = Line, y = h2, fill = Line)) +
geom_bar(stat = "identity", width = 0.6) +
geom_errorbar(aes(ymin = h2 - h2_SE, ymax = h2 + h2_SE), width = 0.2) +
geom_text(aes(label = h2), vjust = -0.8, size = 5, fontface = "bold") +
geom_hline(yintercept = 0.45, linetype = "dashed", color = "orange", linewidth = 1) +
annotate("text", x = 0.6, y = 0.47, label = "Typical upper range (hยฒ = 0.45)", hjust = 0, color = "orange", size = 3.5) +
scale_fill_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169",
"NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
labs(title = "Heritability Estimates for Body Weight at 16 Weeks",
subtitle = "Error bars show ยฑ 1 SE | Dashed line indicates typical upper range",
x = "Line", y = "Heritability (hยฒ)") +
theme(legend.position = "none") +
scale_y_continuous(limits = c(0, max(vc_summary$h2 + vc_summary$h2_SE, na.rm = TRUE) * 1.2),
expand = expansion(mult = c(0, 0.05)))
}if (!is.null(ebv_stats) && nrow(ebv_stats) > 0) {
ebv_stats %>%
kable(col.names = c("Line", "N", "Mean EBV (g)", "SD (g)", "Min (g)", "Max (g)", "Mean F%", "N Inbred"),
align = c("l", rep("r", 7))) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
row_spec(0, bold = TRUE, background = "#2d3748", color = "white")
} else {
cat("EBV data not available.\n")
}| Line | N | Mean EBV (g) | SD (g) | Min (g) | Max (g) | Mean F% | N Inbred |
|---|---|---|---|---|---|---|---|
| BIL-20 | 685 | 35.7 | 67.7 | -217.2 | 207.8 | 1.37 | 75 |
| LIL-20 | 798 | 71.1 | 111.9 | -307.5 | 315.8 | 0.64 | 26 |
| NHIL-20 | 641 | 175.6 | 298.0 | -668.4 | 842.7 | 1.33 | 59 |
| WIL-20 | 600 | 57.4 | 95.8 | -243.0 | 293.7 | 1.17 | 46 |
if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
ggplot(ebv_all, aes(x = EBV, fill = Line)) +
geom_histogram(bins = 30, alpha = 0.8, color = "white") +
geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
facet_wrap(~Line, scales = "free_y") +
scale_fill_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169",
"NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
labs(title = "EBV Distribution for Body Weight at 16 Weeks",
subtitle = "Red dashed line indicates EBV = 0",
x = "EBV (g)", y = "Count") +
theme(legend.position = "none")
}all_pheno_with_id <- bind_rows(
lil_data %>% select(Line, ID, Generation, BW_16wk),
bil_data %>% select(Line, ID, Generation, BW_16wk),
nhil_data %>% select(Line, ID, Generation, BW_16wk),
wil_data %>% select(Line, ID, Generation, BW_16wk)
)
if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
ebv_with_gen <- ebv_all %>%
mutate(ID_original = as.character(ID_original)) %>%
left_join(all_pheno_with_id %>% select(Line, ID, Generation),
by = c("Line" = "Line", "ID_original" = "ID")) %>%
filter(!is.na(Generation))
genetic_trend <- ebv_with_gen %>%
group_by(Line, Generation) %>%
summarise(
N = n(),
Mean_EBV = mean(EBV, na.rm = TRUE),
SE_EBV = sd(EBV, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
) %>%
mutate(Generation = paste0("G", Generation))
}if (exists("genetic_trend") && nrow(genetic_trend) > 0) {
n_wide <- genetic_trend %>%
select(Line, Generation, N) %>%
pivot_wider(names_from = Generation, values_from = N, names_prefix = "N_")
ebv_wide <- genetic_trend %>%
select(Line, Generation, Mean_EBV) %>%
mutate(Mean_EBV = round(Mean_EBV, 1)) %>%
pivot_wider(names_from = Generation, values_from = Mean_EBV, names_prefix = "Mean_EBV_")
genetic_trend_wide <- n_wide %>%
left_join(ebv_wide, by = "Line")
gen_cols <- sort(unique(genetic_trend$Generation))
col_order <- c("Line")
for (g in gen_cols) {
col_order <- c(col_order, paste0("N_", g), paste0("Mean_EBV_", g))
}
col_order <- col_order[col_order %in% colnames(genetic_trend_wide)]
genetic_trend_wide <- genetic_trend_wide %>% select(all_of(col_order))
genetic_trend_wide %>%
kable(align = c("l", rep("r", ncol(genetic_trend_wide) - 1))) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
row_spec(0, bold = TRUE, background = "#805ad5", color = "white")
}| Line | N_G0 | Mean_EBV_G0 | N_G1 | Mean_EBV_G1 | N_G2 | Mean_EBV_G2 |
|---|---|---|---|---|---|---|
| BIL-20 | 269 | 0.0 | 124 | 48.2 | 292 | 63.4 |
| LIL-20 | 291 | -2.7 | 134 | 68.4 | 362 | 131.4 |
| NHIL-20 | 333 | -3.0 | 93 | 278.5 | 215 | 407.7 |
| WIL-20 | 237 | -0.9 | 87 | 62.4 | 275 | 105.5 |
if (exists("genetic_trend") && nrow(genetic_trend) > 0) {
ggplot(genetic_trend, aes(x = Generation, y = Mean_EBV, color = Line, group = Line)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
geom_line(linewidth = 1.2) +
geom_point(size = 4) +
geom_errorbar(aes(ymin = Mean_EBV - SE_EBV, ymax = Mean_EBV + SE_EBV),
width = 0.1, linewidth = 0.8) +
geom_text(aes(label = round(Mean_EBV, 1)), vjust = -1.5, size = 3.5, show.legend = FALSE) +
scale_color_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169",
"NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
labs(title = "Genetic Trend: Mean EBV for Body Weight at 16 Weeks by Generation",
subtitle = "Error bars represent ยฑ 1 standard error | Dashed line indicates EBV = 0",
x = "Generation", y = "Mean EBV (g)", color = "Line") +
theme(legend.position = "bottom") +
scale_y_continuous(expand = expansion(mult = c(0.15, 0.15)))
}if (exists("genetic_trend") && nrow(genetic_trend) > 0) {
library(gridExtra)
p1 <- ggplot(pheno_trend, aes(x = Generation, y = Mean_BW16, color = Line, group = Line)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3) +
scale_color_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169",
"NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
labs(title = "Phenotypic Trend",
x = "Generation", y = "Mean BW_16wk (g)") +
theme(legend.position = "none")
p2 <- ggplot(genetic_trend, aes(x = Generation, y = Mean_EBV, color = Line, group = Line)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
geom_line(linewidth = 1.2) +
geom_point(size = 3) +
scale_color_manual(values = c("LIL-20" = "#3182ce", "BIL-20" = "#38a169",
"NHIL-20" = "#805ad5", "WIL-20" = "#ed8936")) +
labs(title = "Genetic Trend",
x = "Generation", y = "Mean EBV (g)") +
theme(legend.position = "bottom")
grid.arrange(p1, p2, ncol = 2,
top = grid::textGrob("Phenotypic vs Genetic Trend Comparison",
gp = grid::gpar(fontsize = 14, fontface = "bold")))
}The genetic trend shows the change in mean Estimated Breeding Value (EBV) across generations, representing the genetic progress achieved through selection.
Key points:
The table below identifies the top 10 animals per line based on their Estimated Breeding Values (EBVs) for body weight at 16 weeks. These individuals represent the genetic elite of each line and are prime candidates for retention as breeding sires and dams for the next generation.
Selection Question: Which individuals should be kept as breeding animals?
The animals listed below have the highest genetic merit for body weight. Selecting these individuals as parents of the next generation will maximize genetic progress while maintaining genetic diversity.
if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
top_animals <- ebv_all %>%
group_by(Line) %>%
arrange(desc(EBV)) %>%
slice_head(n = 10) %>%
mutate(Rank = row_number()) %>%
ungroup() %>%
select(Line, Rank, ID_original, EBV, Accuracy, Inbreeding_pct) %>%
mutate(
EBV = round(EBV, 1),
Accuracy = ifelse(is.na(Accuracy), "-", round(Accuracy, 2)),
Inbreeding_pct = ifelse(is.na(Inbreeding_pct), "-", round(Inbreeding_pct, 2))
)
top_animals %>%
kable(col.names = c("Line", "Rank", "Animal ID", "EBV (g)", "Accuracy", "Inbreeding %"),
align = c("l", "c", "l", "r", "r", "r")) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
row_spec(0, bold = TRUE, background = "#276749", color = "white") %>%
row_spec(which(top_animals$Rank == 1), bold = TRUE, background = "#c6f6d5")
}| Line | Rank | Animal ID | EBV (g) | Accuracy | Inbreeding % |
|---|---|---|---|---|---|
| BIL-20 | 1 | C-7908 | 207.8 | 0.71 | 12.5 |
| BIL-20 | 2 | 1222 | 204.3 | 0.54 | 0.0 |
| BIL-20 | 3 | C-7918 | 192.8 | 0.65 | 0.0 |
| BIL-20 | 4 | B-3407 | 189.9 | 0.78 | 0.0 |
| BIL-20 | 5 | C-7943 | 188.5 | 0.66 | 0.0 |
| BIL-20 | 6 | C-3263 | 184.5 | 0.66 | 0.0 |
| BIL-20 | 7 | 1426 | 183.2 | 0.54 | 0.0 |
| BIL-20 | 8 | C-7075 | 178.4 | 0.71 | 12.5 |
| BIL-20 | 9 | 1287 | 174.6 | 0.54 | 0.0 |
| BIL-20 | 10 | B-3091 | 169.2 | 0.68 | 0.0 |
| LIL-20 | 1 | C-7742 | 315.8 | 0.80 | 0.0 |
| LIL-20 | 2 | C-7562 | 299.9 | 0.81 | 0.0 |
| LIL-20 | 3 | C-7660 | 297.5 | 0.81 | 0.0 |
| LIL-20 | 4 | C-7565 | 297.0 | 0.81 | 0.0 |
| LIL-20 | 5 | C-8084 | 292.5 | 0.80 | 0.0 |
| LIL-20 | 6 | C-8158 | 285.6 | 0.81 | 0.0 |
| LIL-20 | 7 | C-8142 | 283.4 | 0.80 | 0.0 |
| LIL-20 | 8 | C-8147 | 281.1 | 0.81 | 0.0 |
| LIL-20 | 9 | C-7719 | 277.3 | 0.80 | 0.0 |
| LIL-20 | 10 | B-3323 | 276.3 | 0.91 | 0.0 |
| NHIL-20 | 1 | C-7171 | 842.7 | 0.90 | 25.0 |
| NHIL-20 | 2 | C-7136 | 842.5 | 0.89 | 12.5 |
| NHIL-20 | 3 | C-7178 | 805.5 | 0.87 | 0.0 |
| NHIL-20 | 4 | C-7180 | 784.1 | 0.87 | 0.0 |
| NHIL-20 | 5 | C-7981 | 774.7 | 0.89 | 12.5 |
| NHIL-20 | 6 | C-7137 | 756.8 | 0.89 | 12.5 |
| NHIL-20 | 7 | C-7128 | 727.8 | 0.87 | 0.0 |
| NHIL-20 | 8 | C-7181 | 724.8 | 0.87 | 0.0 |
| NHIL-20 | 9 | 2384 | 717.4 | 0.87 | 0.0 |
| NHIL-20 | 10 | C-7966 | 693.7 | 0.87 | 0.0 |
| WIL-20 | 1 | C-7498 | 293.7 | 0.71 | 0.0 |
| WIL-20 | 2 | B-3346 | 288.2 | 0.78 | 0.0 |
| WIL-20 | 3 | C-8280 | 287.2 | 0.72 | 0.0 |
| WIL-20 | 4 | C-7497 | 277.2 | 0.71 | 0.0 |
| WIL-20 | 5 | C-7435 | 276.1 | 0.72 | 0.0 |
| WIL-20 | 6 | B-3236 | 269.1 | 0.86 | 0.0 |
| WIL-20 | 7 | C-3153 | 265.1 | 0.71 | 0.0 |
| WIL-20 | 8 | C-7510 | 263.7 | 0.72 | 0.0 |
| WIL-20 | 9 | B-3368 | 262.6 | 0.78 | 0.0 |
| WIL-20 | 10 | C-8283 | 261.2 | 0.72 | 0.0 |
if (!is.null(ebv_all) && nrow(ebv_all) > 0) {
top_summary <- ebv_all %>%
group_by(Line) %>%
arrange(desc(EBV)) %>%
slice_head(n = 10) %>%
summarise(
Top10_Mean_EBV = round(mean(EBV), 1),
Top10_Min_EBV = round(min(EBV), 1),
Top10_Max_EBV = round(max(EBV), 1),
Selection_Differential = round(mean(EBV) - 0, 1),
.groups = "drop"
)
cat("\n")
top_summary %>%
kable(col.names = c("Line", "Mean EBV (Top 10)", "Min EBV", "Max EBV", "Selection Differential (g)"),
align = c("l", rep("r", 4))) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
row_spec(0, bold = TRUE, background = "#2d3748", color = "white")
}| Line | Mean EBV (Top 10) | Min EBV | Max EBV | Selection Differential (g) |
|---|---|---|---|---|
| BIL-20 | 187.3 | 169.2 | 207.8 | 187.3 |
| LIL-20 | 290.6 | 276.3 | 315.8 | 290.6 |
| NHIL-20 | 767.0 | 693.7 | 842.7 | 767.0 |
| WIL-20 | 274.4 | 261.2 | 293.7 | 274.4 |
Genetic analysis of egg production traits (age at first egg, egg number at 45 weeks, egg weight) was not conducted due to insufficient data for reliable variance component estimation.
pheno_stats %>%
select(Line, Egg_N, Egg_Pct_Females) %>%
mutate(Status = ifelse(Egg_N < 100, "Insufficient", "Available")) %>%
kable(col.names = c("Line", "Egg Records (N)", "% of Females", "Status"),
align = c("l", rep("r", 2), "c")) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
row_spec(0, bold = TRUE, background = "#e53e3e", color = "white")| Line | Egg Records (N) | % of Females | Status |
|---|---|---|---|
| LIL-20 | 46 | 9.7 | Insufficient |
| BIL-20 | 43 | 12.3 | Insufficient |
| NHIL-20 | 31 | 8.6 | Insufficient |
| WIL-20 | 28 | 9.7 | Insufficient |
Sample size limitations: With fewer than 100 records per line in most cases, variance component estimation would produce unreliable and imprecise heritability estimates.
Pedigree connectivity: Limited number of dam families with egg records reduces the ability to separate genetic from environmental effects.
Generational coverage: Egg data is primarily available from G1 females, with G2 recording still in progress.
Standard error concerns: Expected standard errors would exceed acceptable thresholds for publication-quality estimates.
The following limitations should be considered when interpreting the results of this evaluation:
Small sample sizes: Population sizes range from 662 to 1215 animals per line, which limits statistical power and may affect variance component estimates.
Single trait analysis: The current evaluation focuses only on body weight at 16 weeks. Other economically important traits (egg production, survival, feed efficiency) require additional data collection before they can be included in a comprehensive selection index.
Limited pedigree depth: With only three generations (G0, G1, G2), the pedigree structure provides limited information for genetic evaluation. G0 founders have unknown parents by design.
Limited relatedness: The number of unique sires and dams is relatively small, creating unbalanced family structures and potentially affecting heritability estimates.
No genomic data: Pedigree-based relationships may not fully capture actual genetic relationships, and genomic selection is not yet possible.
| Measure | Finding |
|---|---|
| Total birds evaluated | 3,790 across 4 lines |
| Generations included | G0, G1, and G2 |
| Survival rates | All lines > 92% |
| Mean BW at 16 weeks | 981g to 1354g |
| Heritability range | 0.298 to 0.76 |
| Genetic trend | Positive for most lines from G0 to G2 |
The current breeding program focuses on a single selection trait: body weight at 16 weeks (BW_16wk). This trait was chosen as the primary selection criterion due to its economic importance and ease of measurement. While egg production traits are being recorded, they are not yet incorporated into the breeding objective due to insufficient data for reliable genetic evaluation.
Continue data collection: The current analysis is based on relatively small sample sizes with a single trait. More phenotypic records across generations are needed to improve precision of genetic parameter estimates.
Expand trait recording: Systematic recording of additional economically important traits is essential:
Establish biannual evaluation schedule: Conduct genetic evaluations twice per year to ensure timely selection decisions and continuous progress monitoring.
Multi-trait selection index: As sufficient data becomes available for multiple traits, develop a comprehensive selection index that balances growth and reproductive traits.
Consider genomic tools: Implement SNP genotyping to improve relationship estimation and enable genomic selection.
Standardize ID systems: Ensure consistent animal identification across data collection, pedigree recording, and genetic evaluation to avoid ID matching issues.
This analytical framework should be extended to poultry datasets from other project countries to strengthen regional genetic improvement:
| Region | Countries | Priority | Expected Impact |
|---|---|---|---|
| East Africa | Tanzania | High | Harmonize evaluation methods, enable germplasm exchange |
| West Africa | Nigeria | Medium | Regional expansion, GรE assessment |
| Asia | Vietnam, other project countries | Medium | Cross-regional comparison, diverse environment testing |
Benefits of Multi-Country Extension:
As this breeding program matures and more comprehensive data becomes available, the genetic evaluation framework can provide value for various stakeholders:
For Internal Program Management:
For Future Partnerships:
For Capacity Building: