library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(knitr)
library(kableExtra)
# ── Custom ggplot2 theme matching the report palette (colorblind-friendly) ──
theme_report <- theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 15, color = "#2166AC"),
plot.subtitle = element_text(color = "#6c757d", size = 11),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA),
panel.grid.major = element_line(color = "#f0f0f0", linewidth = 0.4),
panel.grid.minor = element_blank(),
axis.title = element_text(size = 11, color = "#495057", face = "bold"),
axis.text = element_text(size = 10, color = "#6c757d"),
legend.position = "top",
legend.title = element_text(face = "bold", size = 10),
legend.text = element_text(size = 10),
strip.text = element_text(face = "bold", size = 11, color = "#2166AC"),
strip.background = element_rect(fill = "#D1E5F0", color = NA)
)
theme_set(theme_report)
# ── Colorblind-friendly palettes (RdBu / Blue-Orange diverging) ──
pal_primary <- c("#2166AC", "#4393C3", "#67A9CF", "#92C5DE", "#D1E5F0")
pal_accent <- c("#D6604D", "#F4A582", "#FDDBC7", "#4393C3", "#2166AC")
pal_sex <- c("F" = "#4393C3", "M" = "#D6604D")
working_dir <- "C:/Users/SAlemu/OneDrive - CGIAR/Documents/Nigeria_poultry"
setwd(working_dir)
file_path <- "AMO NIGERIA DATA.xlsx"
dat <- read_excel(file_path, sheet = 1)
dat <- dat %>%
select(-any_of(c("INDEX", "REMARK", "FILTER_NUM", "SPOT_NUMB",
"TAGNO", "SAMPLE_UNI"))) %>%
mutate(
SIRENO = as.character(SIRENO),
DAMNO = as.character(DAMNO),
GSIRENO = as.character(GSIRENO),
WINGNO = as.character(WINGNO),
SEX = as.factor(SEX),
GCODE = as.factor(GCODE)
)
This report provides a data-driven assessment of the AMO Nigeria poultry farm dataset for genetic evaluation and breeding strategy development.
n_animals <- nrow(dat)
n_females <- sum(dat$SEX == "F", na.rm = TRUE)
n_males <- sum(dat$SEX == "M", na.rm = TRUE)
n_sires <- length(unique(na.omit(dat$SIRENO)))
n_dams <- length(unique(na.omit(dat$DAMNO)))
n_gsires <- length(unique(na.omit(dat$GSIRENO)))
sires_in_data <- sum(unique(na.omit(dat$SIRENO)) %in% dat$WINGNO)
dams_in_data <- sum(unique(na.omit(dat$DAMNO)) %in% dat$WINGNO)
# Generation assignment
gen1_sires <- dat$WINGNO[dat$WINGNO %in% dat$SIRENO]
gen1_dams <- dat$WINGNO[dat$WINGNO %in% dat$DAMNO]
dat$generation <- ifelse(dat$WINGNO %in% c(gen1_sires, gen1_dams), "Gen1_Parent", "Gen2_Offspring")
n_gen1 <- sum(dat$generation == "Gen1_Parent")
n_gen2 <- sum(dat$generation == "Gen2_Offspring")
| Parameter | Value |
|---|---|
| Total birds | 520 |
| Females / Males | 500 / 20 |
| Unique sires | 31 |
| Unique dams | 173 |
| Unique grand-sires | 14 |
| Sires with own phenotype | 4 |
| Dams with own phenotype | 20 |
| Generation 1 (parents) | 24 |
| Generation 2 (offspring) | 496 |
| Generations spanned | ≥ 2 (overlapping) |
The dataset contains 520 records with 20 columns spanning identification, pedigree, and phenotypic variables.
missing_summary <- dat %>%
summarise(across(everything(), ~sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "Variable", values_to = "N_Missing") %>%
mutate(Pct_Missing = round(N_Missing / nrow(dat) * 100, 2)) %>%
filter(N_Missing > 0) %>%
arrange(desc(N_Missing))
if (nrow(missing_summary) > 0) {
kable(missing_summary, caption = "Variables with Missing Values") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
} else {
cat("No missing values detected.")
}
| Variable | N_Missing | Pct_Missing |
|---|---|---|
| CAGENO | 23 | 4.42 |
| EW1 | 23 | 4.42 |
| EW2 | 23 | 4.42 |
| EW3 | 23 | 4.42 |
| EWT | 23 | 4.42 |
| BW1 | 23 | 4.42 |
| BW2 | 23 | 4.42 |
| BW3 | 23 | 4.42 |
| EP | 23 | 4.42 |
| LP | 23 | 4.42 |
| AFEGG | 23 | 4.42 |
| PP | 23 | 4.42 |
| GSIRENO | 3 | 0.58 |
| SIRENO | 3 | 0.58 |
| DAMNO | 3 | 0.58 |
sex_tab <- dat %>%
count(SEX, name = "Count") %>%
mutate(Percent = round(Count / sum(Count) * 100, 2))
kable(sex_tab, caption = "Sex Distribution") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
| SEX | Count | Percent |
|---|---|---|
| F | 500 | 96.15 |
| M | 20 | 3.85 |
gcode_tab <- dat %>%
count(GCODE, name = "Count") %>%
mutate(Percent = round(Count / sum(Count) * 100, 2)) %>%
arrange(desc(Count))
kable(gcode_tab, caption = "Genetic Code (GCODE) Distribution") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
| GCODE | Count | Percent |
|---|---|---|
| K10 | 520 | 100 |
trait_cols <- c("EW1", "EW2", "EW3", "EWT", "BW1", "BW2", "BW3",
"EP", "LP", "TE", "AFEGG", "PP")
trait_cols <- trait_cols[trait_cols %in% names(dat)]
desc_stats <- dat %>%
select(all_of(trait_cols)) %>%
pivot_longer(everything(), names_to = "Trait", values_to = "Value") %>%
filter(!is.na(Value)) %>%
group_by(Trait) %>%
summarise(
N = n(),
Mean = round(mean(Value), 2),
SD = round(sd(Value), 2),
`CV%` = round(sd(Value) / mean(Value) * 100, 2),
Min = round(min(Value), 2),
Q1 = round(quantile(Value, 0.25), 2),
Median = round(median(Value), 2),
Q3 = round(quantile(Value, 0.75), 2),
Max = round(max(Value), 2),
.groups = "drop"
) %>%
arrange(Trait)
kable(desc_stats, caption = "Descriptive Statistics for All Traits") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = TRUE) %>%
row_spec(0, bold = TRUE)
| Trait | N | Mean | SD | CV% | Min | Q1 | Median | Q3 | Max |
|---|---|---|---|---|---|---|---|---|---|
| AFEGG | 497 | 149.27 | 10.09 | 6.76 | 125.0 | 143.00 | 149.0 | 156.00 | 200.00 |
| BW1 | 497 | 1756.04 | 165.43 | 9.42 | 1193.0 | 1651.00 | 1763.0 | 1863.00 | 2168.00 |
| BW2 | 497 | 2462.32 | 260.55 | 10.58 | 1108.0 | 2293.00 | 2458.0 | 2638.00 | 3398.00 |
| BW3 | 497 | 2699.64 | 294.86 | 10.92 | 1385.0 | 2533.00 | 2703.0 | 2868.00 | 3508.00 |
| EP | 497 | 107.16 | 28.20 | 26.32 | -3.0 | 95.00 | 115.0 | 127.00 | 153.00 |
| EW1 | 497 | 43.51 | 2.87 | 6.59 | 32.2 | 41.80 | 43.6 | 45.40 | 56.90 |
| EW2 | 497 | 55.00 | 4.29 | 7.80 | 35.0 | 52.50 | 55.0 | 57.80 | 69.00 |
| EW3 | 497 | 60.17 | 6.48 | 10.76 | 32.0 | 57.00 | 60.0 | 63.10 | 108.00 |
| EWT | 497 | 52.80 | 3.40 | 6.44 | 39.9 | 50.80 | 52.9 | 54.90 | 68.80 |
| LP | 497 | 53.06 | 36.72 | 69.20 | -3.0 | 14.00 | 66.0 | 86.00 | 108.00 |
| PP | 497 | 61.25 | 22.13 | 36.14 | 0.7 | 44.60 | 69.2 | 78.91 | 93.14 |
| TE | 520 | 153.14 | 66.19 | 43.22 | -5.0 | 105.75 | 178.5 | 205.00 | 252.00 |
egg_cols <- c("EP", "LP", "TE", "AFEGG")
egg_cols <- egg_cols[egg_cols %in% names(dat)]
# Full trait name labels
egg_labels <- c(
"EP" = "Early Egg Production",
"LP" = "Late Egg Production",
"TE" = "Total Egg Number",
"AFEGG" = "Age at First Egg (days)"
)
dat %>%
select(all_of(egg_cols)) %>%
pivot_longer(everything(), names_to = "Trait", values_to = "Value") %>%
filter(!is.na(Value)) %>%
mutate(Trait = factor(Trait, levels = names(egg_labels), labels = egg_labels)) %>%
ggplot(aes(x = Value)) +
geom_histogram(aes(y = after_stat(density)), fill = "#4393C3", color = "white",
alpha = 0.85, bins = 30) +
geom_density(color = "#D6604D", linewidth = 1, alpha = 0.7) +
facet_wrap(~Trait, scales = "free") +
labs(title = "Distribution of Egg Production Traits",
subtitle = "AMO Nigeria Poultry Data — Histogram with density overlay",
x = "Value", y = "Density")
Distribution of egg production traits.
Key observations:
offspring_per_sire <- dat %>%
filter(!is.na(SIRENO)) %>%
count(SIRENO, name = "N_Offspring") %>%
arrange(desc(N_Offspring))
sire_summary <- offspring_per_sire %>%
summarise(
N_Sires = n(),
Mean_Offspring = round(mean(N_Offspring), 2),
SD_Offspring = round(sd(N_Offspring), 2),
Min = min(N_Offspring),
Median = median(N_Offspring),
Max = max(N_Offspring),
Total_Offspring = sum(N_Offspring)
)
kable(sire_summary, caption = "Summary: Offspring per Sire") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
| N_Sires | Mean_Offspring | SD_Offspring | Min | Median | Max | Total_Offspring |
|---|---|---|---|---|---|---|
| 31 | 16.68 | 12.63 | 1 | 17 | 40 | 517 |
ggplot(offspring_per_sire, aes(x = N_Offspring)) +
geom_histogram(binwidth = 1, fill = "#4393C3", color = "white", alpha = 0.85) +
geom_vline(aes(xintercept = mean(N_Offspring)), color = "#D6604D",
linewidth = 1.2, linetype = "dashed") +
annotate("text", x = mean(offspring_per_sire$N_Offspring) + 0.5,
y = Inf, vjust = 2, hjust = -0.1,
label = paste("Mean =", round(mean(offspring_per_sire$N_Offspring), 1)),
color = "#D6604D", fontface = "bold", size = 4) +
labs(title = "Distribution of Offspring per Sire",
subtitle = paste("N sires =", nrow(offspring_per_sire)),
x = "Number of Offspring", y = "Number of Sires")
Distribution of offspring per sire.
kable(head(offspring_per_sire, 10), caption = "Top 10 Sires by Offspring Count") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
| SIRENO | N_Offspring |
|---|---|
| 3659 | 40 |
| 2538 | 37 |
| 4043 | 35 |
| 4776 | 35 |
| 1661 | 31 |
| 1211 | 27 |
| 1662 | 27 |
| 1197 | 25 |
| 3620 | 25 |
| 2853 | 24 |
offspring_per_dam <- dat %>%
filter(!is.na(DAMNO)) %>%
count(DAMNO, name = "N_Offspring") %>%
arrange(desc(N_Offspring))
dam_summary <- offspring_per_dam %>%
summarise(
N_Dams = n(),
Mean_Offspring = round(mean(N_Offspring), 2),
SD_Offspring = round(sd(N_Offspring), 2),
Min = min(N_Offspring),
Median = median(N_Offspring),
Max = max(N_Offspring),
Total_Offspring = sum(N_Offspring)
)
kable(dam_summary, caption = "Summary: Offspring per Dam") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
| N_Dams | Mean_Offspring | SD_Offspring | Min | Median | Max | Total_Offspring |
|---|---|---|---|---|---|---|
| 173 | 2.99 | 1.79 | 1 | 3 | 8 | 517 |
ggplot(offspring_per_dam, aes(x = N_Offspring)) +
geom_histogram(binwidth = 1, fill = "#D6604D", color = "white", alpha = 0.85) +
geom_vline(aes(xintercept = mean(N_Offspring)), color = "#2166AC",
linewidth = 1.2, linetype = "dashed") +
annotate("text", x = mean(offspring_per_dam$N_Offspring) + 0.3,
y = Inf, vjust = 2, hjust = -0.1,
label = paste("Mean =", round(mean(offspring_per_dam$N_Offspring), 1)),
color = "#2166AC", fontface = "bold", size = 4) +
labs(title = "Distribution of Offspring per Dam",
subtitle = paste("N dams =", nrow(offspring_per_dam)),
x = "Number of Offspring", y = "Number of Dams")
Distribution of offspring per dam.
sire_dam_combo <- dat %>%
filter(!is.na(SIRENO) & !is.na(DAMNO)) %>%
count(SIRENO, DAMNO, name = "N_Offspring") %>%
arrange(desc(N_Offspring))
sire_dam_summary <- sire_dam_combo %>%
summarise(
N_Matings = n(),
Mean_Offspring = round(mean(N_Offspring), 2),
SD_Offspring = round(sd(N_Offspring), 2),
Min = min(N_Offspring),
Median = median(N_Offspring),
Max = max(N_Offspring),
Total_Offspring = sum(N_Offspring)
)
kable(sire_dam_summary, caption = "Summary: Offspring per Sire-Dam Mating") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
| N_Matings | Mean_Offspring | SD_Offspring | Min | Median | Max | Total_Offspring |
|---|---|---|---|---|---|---|
| 173 | 2.99 | 1.79 | 1 | 3 | 8 | 517 |
# Full-sibs
full_sib_families <- sire_dam_combo %>% filter(N_Offspring >= 2)
n_full_sib_families <- nrow(full_sib_families)
n_full_sib_pairs <- sum(choose(full_sib_families$N_Offspring, 2))
# Full-sib family size distribution
fs_size_dist <- full_sib_families %>%
count(N_Offspring, name = "N_Families") %>%
arrange(N_Offspring)
# Paternal half-sibs
paternal_hs <- dat %>%
filter(!is.na(SIRENO) & !is.na(DAMNO)) %>%
group_by(SIRENO) %>%
summarise(
N_Total = n(),
N_Dams = n_distinct(DAMNO),
Total_Pairs = choose(N_Total, 2),
.groups = "drop"
)
fs_per_sire <- dat %>%
filter(!is.na(SIRENO) & !is.na(DAMNO)) %>%
count(SIRENO, DAMNO, name = "n") %>%
group_by(SIRENO) %>%
summarise(FS_Pairs = sum(choose(n, 2)), .groups = "drop")
paternal_hs <- paternal_hs %>%
left_join(fs_per_sire, by = "SIRENO") %>%
mutate(HS_Pairs = Total_Pairs - FS_Pairs)
n_paternal_hs_pairs <- sum(paternal_hs$HS_Pairs)
n_sires_multi_dam <- sum(paternal_hs$N_Dams > 1)
# Maternal half-sibs
maternal_hs <- dat %>%
filter(!is.na(SIRENO) & !is.na(DAMNO)) %>%
group_by(DAMNO) %>%
summarise(
N_Total = n(),
N_Sires = n_distinct(SIRENO),
Total_Pairs = choose(N_Total, 2),
.groups = "drop"
)
fs_per_dam <- dat %>%
filter(!is.na(SIRENO) & !is.na(DAMNO)) %>%
count(SIRENO, DAMNO, name = "n") %>%
group_by(DAMNO) %>%
summarise(FS_Pairs = sum(choose(n, 2)), .groups = "drop")
maternal_hs <- maternal_hs %>%
left_join(fs_per_dam, by = "DAMNO") %>%
mutate(HS_Pairs = Total_Pairs - FS_Pairs)
n_maternal_hs_pairs <- sum(maternal_hs$HS_Pairs)
n_dams_multi_sire <- sum(maternal_hs$N_Sires > 1)
rel_summary <- data.frame(
Relationship = c("Full-sib families (sire-dam combos ≥ 2 offspring)",
"Full-sib pairs",
"Paternal half-sib pairs (same sire, different dam)",
"Maternal half-sib pairs (same dam, different sire)",
"Total half-sib pairs",
"Sires mated to multiple dams",
"Dams mated to multiple sires"),
Count = c(n_full_sib_families,
n_full_sib_pairs,
n_paternal_hs_pairs,
n_maternal_hs_pairs,
n_paternal_hs_pairs + n_maternal_hs_pairs,
n_sires_multi_dam,
n_dams_multi_sire)
)
kable(rel_summary, caption = "Pedigree Relationship Summary",
col.names = c("Relationship", "N")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
| Relationship | N |
|---|---|
| Full-sib families (sire-dam combos ≥ 2 offspring) | 124 |
| Full-sib pairs | 790 |
| Paternal half-sib pairs (same sire, different dam) | 5656 |
| Maternal half-sib pairs (same dam, different sire) | 0 |
| Total half-sib pairs | 5656 |
| Sires mated to multiple dams | 23 |
| Dams mated to multiple sires | 0 |
if (nrow(fs_size_dist) > 0) {
ggplot(fs_size_dist, aes(x = factor(N_Offspring), y = N_Families)) +
geom_col(fill = "#4393C3", alpha = 0.85, width = 0.7) +
geom_text(aes(label = N_Families), vjust = -0.5, color = "#2166AC",
fontface = "bold", size = 3.5) +
labs(title = "Full-Sib Family Size Distribution",
subtitle = "Number of families by offspring count per sire-dam combination",
x = "Family Size (offspring per sire-dam combo)",
y = "Number of Families")
}
Full-sib family size distribution.
The hierarchical mating design (sires mated to multiple dams) generates both full-sib and paternal half-sib families — essential for partitioning additive genetic variance (between-sire component) and common environmental/dominance effects (within-sire, between-dam component).
Cross-referencing WINGNO against SIRENO and DAMNO reveals that 4 sires and 20 dams are themselves phenotyped in this dataset, confirming at least two overlapping generations.
gen_summary <- dat %>%
group_by(generation) %>%
summarise(
N = n(),
Mean_TE = round(mean(TE, na.rm = TRUE), 1),
Mean_EP = round(mean(EP, na.rm = TRUE), 1),
Mean_AFEGG = round(mean(AFEGG, na.rm = TRUE), 1),
Mean_BW1 = round(mean(BW1, na.rm = TRUE), 1),
Mean_EWT = round(mean(EWT, na.rm = TRUE), 1),
.groups = "drop"
)
kable(gen_summary, caption = "Phenotypic Means by Generation",
col.names = c("Generation", "N", "TE", "EP", "AFEGG", "BW1", "EWT")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
| Generation | N | TE | EP | AFEGG | BW1 | EWT |
|---|---|---|---|---|---|---|
| Gen1_Parent | 24 | 145.9 | 101.9 | 150.2 | 1747.3 | 53.5 |
| Gen2_Offspring | 496 | 153.5 | 107.4 | 149.2 | 1756.5 | 52.8 |
if (nrow(gen_summary) == 2) {
g1 <- gen_summary %>% filter(generation == "Gen1_Parent")
g2 <- gen_summary %>% filter(generation == "Gen2_Offspring")
trend <- data.frame(
Trait = c("TE (total eggs)", "EP (early production)", "AFEGG (days)", "BW1 (g)", "EWT (g)"),
Gen1 = c(g1$Mean_TE, g1$Mean_EP, g1$Mean_AFEGG, g1$Mean_BW1, g1$Mean_EWT),
Gen2 = c(g2$Mean_TE, g2$Mean_EP, g2$Mean_AFEGG, g2$Mean_BW1, g2$Mean_EWT)
) %>%
mutate(
Change = round(Gen2 - Gen1, 1),
`Change%` = round((Gen2 - Gen1) / Gen1 * 100, 1),
Direction = case_when(
Trait == "AFEGG (days)" & Change < 0 ~ "✅ Favorable ↓",
Trait == "AFEGG (days)" & Change >= 0 ~ "⚠️ Unfavorable ↑",
Change > 1 ~ "✅ Favorable ↑",
Change < -1 ~ "⚠️ Check ↓",
TRUE ~ "➡️ Stable →"
)
)
kable(trend, caption = "Phenotypic Trends: Gen1 (Parents) → Gen2 (Offspring)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
}
| Trait | Gen1 | Gen2 | Change | Change% | Direction |
|---|---|---|---|---|---|
| TE (total eggs) | 145.9 | 153.5 | 7.6 | 5.2 | ✅ Favorable ↑ | |
| EP (early production) | 101.9 | 107.4 | 5.5 | 5.4 | ✅ Favorable ↑ | |
| AFEGG (days) | 150.2 | 149.2 | -1.0 | -0.7 | ✅ Favorable ↓ | |
| BW1 (g) | 1747.3 | 1756.5 | 9.2 | 0.5 | ✅ Favorable ↑ | |
| EWT (g) | 53.5 | 52.8 | -0.7 | -1.3 | ➡️ Stable → |
The multi-generation structure enables parent–offspring regression for heritability estimation, genetic trend analysis, and cross-validation of prediction accuracy.
bw_cols <- c("BW1", "BW2", "BW3")
bw_cols <- bw_cols[bw_cols %in% names(dat)]
bw_by_sex <- dat %>%
filter(!is.na(SEX)) %>%
select(SEX, all_of(bw_cols)) %>%
pivot_longer(cols = all_of(bw_cols), names_to = "Period", values_to = "BodyWeight") %>%
filter(!is.na(BodyWeight)) %>%
group_by(SEX, Period) %>%
summarise(
N = n(),
Mean = round(mean(BodyWeight), 1),
SD = round(sd(BodyWeight), 1),
`CV%` = round(sd(BodyWeight) / mean(BodyWeight) * 100, 2),
Min = round(min(BodyWeight), 0),
Median = round(median(BodyWeight), 0),
Max = round(max(BodyWeight), 0),
.groups = "drop"
) %>%
arrange(Period, SEX)
kable(bw_by_sex, caption = "Body Weight Summary by Sex and Period") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = TRUE)
| SEX | Period | N | Mean | SD | CV% | Min | Median | Max |
|---|---|---|---|---|---|---|---|---|
| F | BW1 | 497 | 1756.0 | 165.4 | 9.42 | 1193 | 1763 | 2168 |
| F | BW2 | 497 | 2462.3 | 260.5 | 10.58 | 1108 | 2458 | 3398 |
| F | BW3 | 497 | 2699.6 | 294.9 | 10.92 | 1385 | 2703 | 3508 |
dat %>%
filter(!is.na(SEX)) %>%
select(SEX, all_of(bw_cols)) %>%
pivot_longer(cols = all_of(bw_cols), names_to = "Period", values_to = "BodyWeight") %>%
filter(!is.na(BodyWeight)) %>%
ggplot(aes(x = Period, y = BodyWeight, fill = SEX)) +
geom_boxplot(alpha = 0.8, outlier.shape = 21, outlier.size = 2, width = 0.6) +
stat_summary(fun = mean, geom = "point", shape = 18, size = 3,
color = "#D6604D", position = position_dodge(width = 0.6)) +
labs(title = "Body Weight by Sex and Period",
subtitle = "AMO Nigeria Poultry Data — diamonds show group means",
x = "Body Weight Period", y = "Body Weight (g)", fill = "Sex") +
scale_fill_manual(values = pal_sex)
Body weight distribution across measurement periods by sex.
⚠️ Note: Body weight records are available only for females (n=497). The 20 males lack BW data. Sire EBVs for BW can still be estimated from daughter performance.
cor_data <- dat %>% select(all_of(trait_cols)) %>% drop_na()
cor_mat <- round(cor(cor_data), 3)
kable(cor_mat, caption = paste0("Phenotypic Correlations (n = ", nrow(cor_data), " complete cases)")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = TRUE, font_size = 11)
| EW1 | EW2 | EW3 | EWT | BW1 | BW2 | BW3 | EP | LP | TE | AFEGG | PP | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| EW1 | 1.000 | 0.310 | 0.116 | 0.487 | -0.076 | 0.092 | 0.013 | -0.103 | 0.053 | -0.017 | 0.271 | 0.010 |
| EW2 | 0.310 | 1.000 | 0.426 | 0.770 | 0.001 | 0.081 | 0.014 | -0.064 | 0.029 | -0.013 | 0.192 | 0.010 |
| EW3 | 0.116 | 0.426 | 1.000 | 0.846 | 0.049 | 0.051 | 0.011 | -0.029 | 0.046 | 0.014 | 0.067 | 0.023 |
| EWT | 0.487 | 0.770 | 0.846 | 1.000 | 0.012 | 0.090 | 0.019 | -0.076 | 0.055 | -0.002 | 0.196 | 0.018 |
| BW1 | -0.076 | 0.001 | 0.049 | 0.012 | 1.000 | 0.491 | 0.332 | 0.128 | 0.040 | 0.086 | -0.400 | 0.042 |
| BW2 | 0.092 | 0.081 | 0.051 | 0.090 | 0.491 | 1.000 | 0.609 | -0.069 | 0.030 | -0.014 | 0.008 | -0.016 |
| BW3 | 0.013 | 0.014 | 0.011 | 0.019 | 0.332 | 0.609 | 1.000 | -0.094 | -0.122 | -0.122 | -0.033 | -0.128 |
| EP | -0.103 | -0.064 | -0.029 | -0.076 | 0.128 | -0.069 | -0.094 | 1.000 | 0.629 | 0.874 | -0.353 | 0.849 |
| LP | 0.053 | 0.029 | 0.046 | 0.055 | 0.040 | 0.030 | -0.122 | 0.629 | 1.000 | 0.928 | 0.000 | 0.936 |
| TE | -0.017 | -0.013 | 0.014 | -0.002 | 0.086 | -0.014 | -0.122 | 0.874 | 0.928 | 1.000 | -0.169 | 0.993 |
| AFEGG | 0.271 | 0.192 | 0.067 | 0.196 | -0.400 | 0.008 | -0.033 | -0.353 | 0.000 | -0.169 | 1.000 | -0.070 |
| PP | 0.010 | 0.010 | 0.023 | 0.018 | 0.042 | -0.016 | -0.128 | 0.849 | 0.936 | 0.993 | -0.070 | 1.000 |
# Heatmap visualization
cor_long <- as.data.frame(as.table(cor_mat)) %>%
rename(Trait1 = Var1, Trait2 = Var2, r = Freq)
ggplot(cor_long, aes(x = Trait1, y = Trait2, fill = r)) +
geom_tile(color = "white", linewidth = 0.5) +
geom_text(aes(label = sprintf("%.2f", r)),
size = 3, color = ifelse(abs(cor_long$r) > 0.5, "white", "#333333")) +
scale_fill_gradient2(low = "#D6604D", mid = "white", high = "#2166AC",
midpoint = 0, limits = c(-1, 1),
name = "Correlation") +
labs(title = "Phenotypic Correlation Matrix",
subtitle = "AMO Nigeria Poultry — All measured traits") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title = element_blank(),
panel.grid = element_blank())
Phenotypic correlation heatmap for all traits.
Critical correlations for breeding:
key_corr <- data.frame(
`Trait Pair` = c("EP ↔ TE", "LP ↔ TE", "BW1 ↔ AFEGG",
"BW2 ↔ BW3", "BW1 ↔ BW2", "EW1 ↔ EP", "BW3 ↔ TE"),
r = c(cor_mat["EP","TE"], cor_mat["LP","TE"],
cor_mat["BW1","AFEGG"], cor_mat["BW2","BW3"], cor_mat["BW1","BW2"],
cor_mat["EW1","EP"], cor_mat["BW3","TE"]),
Implication = c(
"Early production strongly predicts total production — enables early selection",
"Late production dominates total — persistency matters",
"Heavier birds mature earlier — biologically meaningful",
"Strong repeatability across body weight periods",
"Moderate repeatability — early BW predicts later BW",
"Mild antagonism between egg size and egg number",
"Weak negative — classic layer BW vs production trade-off"
),
check.names = FALSE
)
kable(key_corr, caption = "Key Correlations for Breeding Strategy") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = TRUE)
| Trait Pair | r | Implication |
|---|---|---|
| EP ↔︎ TE | 0.874 | Early production strongly predicts total production — enables early selection |
| LP ↔︎ TE | 0.928 | Late production dominates total — persistency matters |
| BW1 ↔︎ AFEGG | -0.400 | Heavier birds mature earlier — biologically meaningful |
| BW2 ↔︎ BW3 | 0.609 | Strong repeatability across body weight periods |
| BW1 ↔︎ BW2 | 0.491 | Moderate repeatability — early BW predicts later BW |
| EW1 ↔︎ EP | -0.103 | Mild antagonism between egg size and egg number |
| BW3 ↔︎ TE | -0.122 | Weak negative — classic layer BW vs production trade-off |
| Requirement | Status | Assessment |
|---|---|---|
| Phenotypic records | ✅ | 12 traits, 497–520 records each |
| Pedigree (sire–dam) | ✅ | 3-generation depth (GSIRENO, SIRENO, DAMNO) |
| Multi-generation data | ✅ | ≥ 2 generations; 4 sires & 20 dams phenotyped |
| Parent–offspring links | ✅ | Enables genetic trend & parent–offspring regression |
| Family structure | ✅ | 124 full-sib families; 5656 paternal HS pairs |
| Fixed effects | ✅ | SEX, GCODE, CAGENO available |
| Genotype data | ✅ | SNP genotypes available — genomic selection feasible |
| Population size | ⚠️ | 520 birds — adequate for sire model; expand for higher accuracy |
| Male BW records | ❌ | 20 males lack body weight data |
| Analysis | Feasibility | Software |
|---|---|---|
| Sire model BLUP | ✅ Recommended | BLUPF90, ASReml |
| Animal model BLUP | ✅ Feasible | BLUPF90, ASReml |
| Variance components (REML) | ✅ Feasible | AIREMLF90, ASReml |
| Parent–offspring regression | ✅ Feasible | R |
| Genetic trend analysis | ✅ Feasible | R, BLUPF90 |
| Multi-trait animal model | ⚠️ Feasible (2–3 traits) | AIREMLF90 |
| Repeatability model (BW) | ✅ Feasible | BLUPF90 |
| Selection index optimization | ✅ Feasible | R (Smith-Hazel) |
| Single-step GBLUP (ssGBLUP) | ✅ Feasible | BLUPF90+, PREGSF90 |
| GBLUP / Bayesian methods | ✅ Feasible | JWAS, BayesR, rrBLUP |
| GWAS | ✅ Feasible | POSTGSF90, GEMMA |
Based on literature for similar tropical poultry populations. These should be estimated from this data using REML.
| Trait | Expected h² | Literature Range | Comment |
|---|---|---|---|
| Body weight (BW1–BW3) | 0.25–0.45 | 0.20–0.55 | Moderate-to-high; responds well to selection |
| Egg weight (EW1–EW3) | 0.40–0.60 | 0.35–0.65 | Among the most heritable poultry traits |
| Age at first egg (AFEGG) | 0.25–0.40 | 0.20–0.45 | Moderate heritability |
| Egg production (EP, TE) | 0.10–0.25 | 0.05–0.30 | Low-to-moderate; large environmental influence |
| Persistency (LP/PP) | 0.05–0.15 | 0.03–0.20 | Low; difficult to improve by selection alone |
Genotype data (SNP markers) is available for this population, making genomic selection feasible. Combined with the multi-generation pedigree and phenotypic records, the following genomic analyses can be implemented:
| Analysis | Method | Software |
|---|---|---|
| Single-step GBLUP | Combines A and G matrices via H-matrix | BLUPF90+ (PREGSF90) |
| GBLUP | G-matrix replaces A for genotyped animals | BLUPF90, rrBLUP, JWAS |
| Bayesian methods | BayesA, BayesB, BayesCπ, BayesR | JWAS (Julia), GenSel |
| GWAS | Single-SNP or window-based association | POSTGSF90, GEMMA |
| Cross-validation | Gen1 training, Gen2 validation | Custom R/Julia scripts |
Implementation steps:
Genetic gain per generation: ΔG = i × rTI × σA
# Use actual data for sigma_P
sigma_TE <- desc_stats %>% filter(Trait == "TE") %>% pull(SD)
mean_TE <- desc_stats %>% filter(Trait == "TE") %>% pull(Mean)
sigma_AFEGG <- desc_stats %>% filter(Trait == "AFEGG") %>% pull(SD)
mean_AFEGG <- desc_stats %>% filter(Trait == "AFEGG") %>% pull(Mean)
# Scenario calculations
scenarios <- data.frame(
Scenario = c(
"1: Pedigree BLUP — TE (top 10% females)",
"2: Pedigree BLUP — AFEGG (top 20%)",
"3: Multi-trait index (TE + EW + AFEGG)",
"4: Genomic selection — TE (ssGBLUP)"
),
h2 = c(0.15, 0.30, NA, 0.15),
i = c(1.76, 1.40, 1.40, 1.76),
r_TI = c(0.45, 0.55, 0.50, 0.60),
sigma_P = c(sigma_TE, sigma_AFEGG, sigma_TE, sigma_TE),
stringsAsFactors = FALSE
) %>%
mutate(
sigma_A = ifelse(!is.na(h2), round(sqrt(h2) * sigma_P, 2), NA),
Delta_G = ifelse(!is.na(sigma_A), round(i * r_TI * sigma_A, 1), NA)
)
# Scenario 3: approximate multi-trait gain (reduced from single-trait by ~30%)
scenarios$Delta_G[3] <- round(scenarios$Delta_G[1] * 0.70, 1)
scenarios$sigma_A[3] <- NA
gain_display <- scenarios %>%
select(Scenario, Delta_G) %>%
mutate(
Unit = c("eggs/gen", "days/gen", "eggs/gen (TE component)", "eggs/gen"),
Delta_G = c(
paste0("+", Delta_G[1]),
paste0(Delta_G[2]),
paste0("+", Delta_G[3]),
paste0("+", Delta_G[4])
)
)
kable(gain_display, caption = "Predicted Genetic Gain per Generation",
col.names = c("Scenario", "ΔG", "Unit")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
| Scenario | ΔG | Unit |
|---|---|---|
| 1: Pedigree BLUP — TE (top 10% females) | +20.3 | eggs/gen |
| 2: Pedigree BLUP — AFEGG (top 20%) | 4.3 | days/gen |
| 3: Multi-trait index (TE + EW + AFEGG) | +14.2 | eggs/gen (TE component) |
| 4: Genomic selection — TE (ssGBLUP) | +27.1 | eggs/gen |
if (nrow(gen_summary) == 2) {
observed_TE_change <- g2$Mean_TE - g1$Mean_TE
estimated_genetic <- round(observed_TE_change / 2, 1)
predicted_blup <- scenarios$Delta_G[1]
multiplier <- round(as.numeric(predicted_blup) / estimated_genetic, 0)
cat(paste0('\n\n<div class="success-box">\n',
'**📈 Observed vs. Predicted:** The observed phenotypic trend is +',
round(observed_TE_change, 1), ' eggs in TE (Gen1 → Gen2). ',
'Assuming ~50% is genetic, the realized gain is ~', estimated_genetic,
' eggs/generation. Formal BLUP selection (predicted: +', predicted_blup,
' eggs) could multiply progress by approximately **', multiplier, 'x**.\n',
'</div>\n\n'))
}
📈 Observed vs. Predicted: The observed phenotypic trend is +7.6 eggs in TE (Gen1 → Gen2). Assuming ~50% is genetic, the realized gain is ~3.8 eggs/generation. Formal BLUP selection (predicted: +20.3 eggs) could multiply progress by approximately 5x.
| Timeframe | Generations | Expected Cumulative Gain (TE) | Approach |
|---|---|---|---|
| Year 1–2 | 1 | +12–18 eggs | Pedigree BLUP / ssGBLUP |
| Year 3–5 | 2–3 | +25–45 eggs | ssGBLUP with expanded recording |
| Year 5–8 | 3–5 | +50–80 eggs | Mature genomic selection program |
Run genetic evaluation (pedigree + genomic): Fit single-step GBLUP using BLUPF90 suite — PREGSF90 for genotype QC and G-matrix, AIREMLF90 for variance components, BLUPF90+ for GEBVs. Start with single-trait models for TE, AFEGG, EW, and BW.
Estimate genetic parameters: Use REML (AIREMLF90) to estimate heritabilities and genetic correlations. Validate with parent–offspring regression using the 4 sire and 20 dam links.
Estimate genetic trends: Compare mean EBVs/GEBVs between Gen1 and Gen2 to quantify realized genetic progress.
Record male body weights: Begin recording BW for all 20 males to improve sire EBV accuracy.
Investigate zero-production records: Check whether zero values in EP, LP, TE represent true non-producers, early culling, or data entry errors.
Develop a multi-trait selection index: Construct a balanced index with economic weights for TE, EW, BW, and AFEGG using Smith-Hazel methodology.
Expand recording: Add feed intake (FCR), egg quality traits, and livability data.
Increase population size: Target ≥ 1,000 birds per generation for robust parameter estimation and genomic prediction accuracy.
Scale genomic selection: Genotype all selection candidates at hatch to maximize benefit of reduced generation interval.
Genetic dissemination: Develop a strategy for how improved genetics from AMO farm flow to multiplier and commercial flocks.