🎯 Project Context: The Tropical Dairy Genetic Gains (TDGG) project aims to establish sustainable genetic improvement programs for smallholder dairy systems in East Africa, funded by the Bill & Melinda Gates Foundation.
| Trait | Unit | Heritability | Importance |
|---|---|---|---|
| Age at First Calving (AFC) | months | 0.05 - 0.15 | Reproductive efficiency |
| Daily Milk Yield (DMY) | kg/day | 0.15 - 0.30 | Daily productivity |
| Total Lactation Yield (TLY) | kg/lactation | 0.20 - 0.35 | Total production |
| Calving Interval (CI) | days | 0.02 - 0.06 | Reproductive interval |
# ==============================================================================
# SET YOUR DATA PATH HERE
# ==============================================================================
data_path <- "C:/Users/SAlemu/OneDrive - CGIAR/Documents/KenyaData/New_Kenya_data/Version2/"
output_path <- paste0(data_path, "BLUPF90CL/")
if(!dir.exists(output_path)) dir.create(output_path, recursive = TRUE)
cat("═══════════════════════════════════════════════════════════════════════════\n")═══════════════════════════════════════════════════════════════════════════
LOADING KENYA DAIRY DATA FILES
═══════════════════════════════════════════════════════════════════════════
# Pedigree data
pedigree <- fread(paste0(data_path, "pedigree-extract.csv"),
na.strings = c("NULL", "null", "NA", "N/A", "", " "),
stringsAsFactors = FALSE)
# Lactation data
lactation <- fread(paste0(data_path, "lactation_calving_improved(in).csv"),
na.strings = c("NULL", "null", "NA", "N/A", "", " "),
stringsAsFactors = FALSE)
# Milking data
milking <- fread(paste0(data_path, "milking_data_improved.csv"),
na.strings = c("NULL", "null", "NA", "N/A", "", " "),
stringsAsFactors = FALSE)
cat("✓ Pedigree:", format(nrow(pedigree), big.mark = ","), "records\n")✓ Pedigree: 248,208 records
✓ Lactation: 88,352 records
✓ Milking: 1,048,575 records
# Show column names for debugging
cat("Pedigree columns:", paste(names(pedigree), collapse = ", "), "\n\n")Pedigree columns: country, region, district, ward, village, farmer_name, farm_id, org_id, organization_name, project, animal_id, tag_id, original_tag_id, sire_tag_id, sire_id, dam_tag_id, dam_id, sex, estimated_sex, reg_date, birthdate, main_breed, breed, longitude, latitude, warning, error, Extract_Datetime_UTC, Version
Lactation columns: farm_id, project, animal_id, tag_id, original_tag_id, animal_name, animal_registration_date, birthdate, sex, animal_sex, animal_type, animal_type_name, main_breed, main_breed_name, breed_composition, breed_proportion, secondary_breed_id, secondary_breed_name, secondary_breed_other, lactation_id, calving_date, parity, afc_months, afc_days, calving_interval_days, calving_interval_months, lactation_length_days, lactation_length_months, total_lactation_yield, testday_count, days_open, org_id, project_name, V34
Milking columns: farm_id, project, animal_id, tag_id, original_tag_id, animal_name, animal_registration_date, birthdate, sex, animal_sex, main_breed, main_breed_name, breed_composition, breed_proportion, secondary_breed_id, secondary_breed_name, secondary_breed_other, lactation_id, calving_date, parity, afc_months, afc_days, calving_age_months, calving_age_days, milk_event_id, milking_date, testday_no, dim, milk_morning, milk_evening, milk_midday, daily_yield, data_completeness_score, org_id, project_name, record_created_date
head(pedigree, 5) %>%
select(any_of(c("animal_id", "sire_id", "dam_id", "sex", "breed", "main_breed",
"region", "district", "farm_id", "country"))) %>%
styled_table(caption = "Sample pedigree records")| animal_id | sire_id | dam_id | sex | breed | main_breed | region | district | farm_id | country |
|---|---|---|---|---|---|---|---|---|---|
| 170285 | NA | NA | FEMALE | Ayrshire | 3 | Migori | Kuria West | 515160 | Kenya |
| 170298 | 0 | NA | FEMALE | Ayrshire | 3 | Kakamega | Likuyani | 515626 | Kenya |
| 170315 | NA | NA | FEMALE | Ayrshire | 3 | Kakamega | Lugari | 598316 | Kenya |
| 170328 | NA | NA | FEMALE | Ayrshire | 3 | Kakamega | Likuyani | 595412 | Kenya |
| 170506 | NA | NA | FEMALE | Local Zebu | 26 | Homa Bay | Ndhiwa | 514950 | Kenya |
n_animals <- nrow(pedigree)
n_sires <- n_distinct(pedigree$sire_id[!is.na(pedigree$sire_id)])
n_dams <- n_distinct(pedigree$dam_id[!is.na(pedigree$dam_id)])
n_founders <- sum(is.na(pedigree$sire_id) & is.na(pedigree$dam_id))
pct_known_sire <- mean(!is.na(pedigree$sire_id)) * 100
pct_known_dam <- mean(!is.na(pedigree$dam_id)) * 100
data.frame(
Statistic = c("Total Animals", "Unique Sires", "Unique Dams",
"Founders", "Known Sire (%)", "Known Dam (%)"),
Value = c(format(n_animals, big.mark = ","), format(n_sires, big.mark = ","),
format(n_dams, big.mark = ","), format(n_founders, big.mark = ","),
paste0(round(pct_known_sire, 1), "%"), paste0(round(pct_known_dam, 1), "%"))
) %>% styled_table(caption = "Pedigree completeness statistics")| Statistic | Value |
|---|---|
| Total Animals | 248,208 |
| Unique Sires | 8,415 |
| Unique Dams | 44,062 |
| Founders | 151,283 |
| Known Sire (%) | 32.1% |
| Known Dam (%) | 28.7% |
# Create summary data
region_summary <- pedigree %>%
filter(!is.na(region)) %>%
group_by(region) %>%
summarise(
Animals = n(),
Farms = n_distinct(farm_id),
Districts = n_distinct(district),
.groups = 'drop'
) %>%
arrange(desc(Animals)) %>%
mutate(
Percentage = round(Animals / sum(Animals) * 100, 1),
# Create label with region name and count
label_animals = paste0(region, ": ", format(Animals, big.mark = ","), " (", Percentage, "%)"),
label_farms = paste0(region, ": ", format(Farms, big.mark = ","))
)
# Display as a nice table first
region_summary %>%
select(Region = region, Animals, `%` = Percentage, Farms, Districts) %>%
mutate(Animals = format(Animals, big.mark = ","),
Farms = format(Farms, big.mark = ",")) %>%
styled_table(caption = "Geographic distribution summary")| Region | Animals | % | Farms | Districts |
|---|---|---|---|---|
| Nakuru | 56,756 | 27.9 | 1,680 | 12 |
| Makueni | 15,293 | 7.5 | 3,265 | 8 |
| Trans Nzoia | 14,302 | 7.0 | 321 | 5 |
| Uasin Gishu | 13,297 | 6.5 | 1,462 | 7 |
| Kiambu | 11,948 | 5.9 | 948 | 11 |
| Nairobi | 11,849 | 5.8 | 489 | 10 |
| Nyeri | 10,100 | 5.0 | 1,475 | 6 |
| Laikipia | 9,590 | 4.7 | 329 | 3 |
| Muranga | 6,749 | 3.3 | 1,708 | 8 |
| Nyandarua | 5,973 | 2.9 | 1,100 | 8 |
| Meru | 5,710 | 2.8 | 1,131 | 6 |
| Kisumu | 4,290 | 2.1 | 1,382 | 8 |
| Narok | 4,251 | 2.1 | 117 | 6 |
| Kakamega | 4,028 | 2.0 | 1,277 | 12 |
| Nandi County | 2,484 | 1.2 | 549 | 5 |
| Siaya | 2,442 | 1.2 | 1,068 | 7 |
| Kajiado | 2,129 | 1.0 | 138 | 5 |
| Kilifi | 1,738 | 0.9 | 8 | 3 |
| Bungoma | 1,736 | 0.9 | 512 | 9 |
| Embu | 1,603 | 0.8 | 209 | 3 |
| Taita Taveta | 1,587 | 0.8 | 181 | 6 |
| Homa Bay | 1,584 | 0.8 | 838 | 9 |
| Busia | 1,553 | 0.8 | 720 | 8 |
| Migori | 1,504 | 0.7 | 825 | 10 |
| Kisii | 1,240 | 0.6 | 273 | 10 |
| Baringo | 1,237 | 0.6 | 101 | 6 |
| Bomet | 1,002 | 0.5 | 193 | 6 |
| Tharaka Nithi | 981 | 0.5 | 200 | 3 |
| Vihiga | 969 | 0.5 | 431 | 6 |
| Kitui | 862 | 0.4 | 516 | 9 |
| Elgeyo Marakwet | 823 | 0.4 | 85 | 4 |
| Machakos | 800 | 0.4 | 108 | 6 |
| Kericho | 788 | 0.4 | 95 | 5 |
| Kirinyaga | 739 | 0.4 | 84 | 4 |
| Nyamira | 667 | 0.3 | 196 | 5 |
| Mombasa | 400 | 0.2 | 7 | 1 |
| West Pokot | 118 | 0.1 | 9 | 2 |
| Marsabit | 58 | 0.0 | 14 | 2 |
| Kwale | 45 | 0.0 | 2 | 2 |
| Oromia | 18 | 0.0 | 1 | 1 |
| Mandera | 8 | 0.0 | 1 | 1 |
| Tana River | 6 | 0.0 | 3 | 1 |
| Amhara | 5 | 0.0 | 3 | 2 |
| Kolero Ward | 3 | 0.0 | 1 | 1 |
| Osun | 1 | 0.0 | 1 | 1 |
# Side-by-side horizontal bar plots with values displayed inside bars
p1 <- ggplot(region_summary, aes(x = reorder(region, Animals), y = Animals)) +
geom_col(aes(fill = Animals), width = 0.7, show.legend = FALSE) +
geom_text(aes(label = paste0(format(Animals, big.mark = ","), " (", Percentage, "%)")),
hjust = 1.1, size = 4, fontface = "bold", color = "white") +
scale_y_continuous(expand = expansion(mult = c(0, 0.05)), labels = comma) +
scale_fill_gradient(low = "#3B82F6", high = "#1e3a8a") +
coord_flip() +
labs(
title = "Animals by Region",
x = NULL,
y = "Number of Animals"
) +
theme(
axis.text.y = element_text(size = 11, face = "bold"),
plot.title = element_text(size = 14)
)
p2 <- ggplot(region_summary, aes(x = reorder(region, Farms), y = Farms)) +
geom_col(aes(fill = Farms), width = 0.7, show.legend = FALSE) +
geom_text(aes(label = format(Farms, big.mark = ",")),
hjust = 1.1, size = 4, fontface = "bold", color = "white") +
scale_fill_gradient(low = "#10B981", high = "#065f46") +
scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
coord_flip() +
labs(
title = "Farms by Region",
x = NULL,
y = "Number of Farms"
) +
theme(
axis.text.y = element_text(size = 11, face = "bold"),
plot.title = element_text(size = 14)
)
p1 + p2 + plot_annotation(
title = "Geographic Coverage of Kenya Dairy Genetic Evaluation",
theme = theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5))
)🗺️ Interactive Visualization: Explore the geographic distribution of dairy cattle and farms across Kenya’s counties. Click on markers for detailed statistics.
# ==============================================================================
# KENYA COUNTY COORDINATES
# ==============================================================================
# Approximate centroids for Kenya's major dairy regions
# You may need to adjust these based on your actual region names in the data
kenya_coords <- data.frame(
region = c(
# Major dairy regions (Rift Valley & Central)
"Nakuru", "Nyandarua", "Kiambu", "Nyeri", "Murang'a", "Kirinyaga",
"Laikipia", "Narok", "Baringo", "Uasin Gishu", "Trans Nzoia", "Elgeyo Marakwet",
"Nandi", "Kericho", "Bomet", "West Pokot",
# Western Kenya
"Kakamega", "Bungoma", "Vihiga", "Busia",
# Nyanza
"Kisii", "Nyamira", "Kisumu", "Siaya", "Homa Bay", "Migori",
# Eastern
"Meru", "Embu", "Tharaka-Nithi", "Machakos", "Makueni", "Kitui",
# Coast
"Kilifi", "Kwale", "Mombasa", "Taita Taveta",
# Nairobi & surroundings
"Nairobi", "Kajiado"
),
lat = c(
-0.2833, -0.4000, -1.0000, -0.4167, -0.7833, -0.5000,
0.2000, -1.1000, 0.4667, 0.5167, 1.0000, 0.9500,
0.1833, -0.3667, -0.7833, 1.6167,
0.2833, 0.5667, 0.0500, 0.4500,
-0.6833, -0.5667, -0.1000, 0.0617, -0.5167, -1.0667,
0.0500, -0.5333, -0.3000, -1.5167, -1.8000, -1.3667,
-3.5000, -4.1833, -4.0500, -3.4000,
-1.2833, -1.8500
),
lng = c(
36.0667, 36.4333, 36.8333, 37.0000, 37.0000, 37.3333,
36.7833, 35.8667, 35.9667, 35.2833, 35.0000, 35.5000,
35.1833, 35.2833, 35.3333, 35.1000,
34.7500, 34.5667, 34.7333, 34.1333,
34.7667, 34.9333, 34.7500, 34.2500, 34.4500, 34.4667,
37.6500, 37.4500, 37.8500, 37.2667, 37.6167, 38.0000,
39.8500, 39.4500, 39.6667, 38.3500,
36.8167, 36.7833
),
stringsAsFactors = FALSE
)
# Merge with region summary data
map_data <- region_summary %>%
left_join(kenya_coords, by = "region") %>%
filter(!is.na(lat), !is.na(lng))
# Check for unmatched regions
unmatched <- region_summary %>%
anti_join(kenya_coords, by = "region")
if(nrow(unmatched) > 0) {
cat("Note: The following regions could not be mapped (coordinates not found):\n")
cat(paste(unmatched$region, collapse = ", "), "\n")
cat("You may need to add their coordinates to the kenya_coords dataframe.\n\n")
}Note: The following regions could not be mapped (coordinates not found):
Muranga, Nandi County, Tharaka Nithi, Marsabit, Oromia, Mandera, Tana River, Amhara, Kolero Ward, Osun
You may need to add their coordinates to the kenya_coords dataframe.
# ==============================================================================
# INTERACTIVE BUBBLE MAP - Animals by Region
# ==============================================================================
if(nrow(map_data) > 0) {
# Create color palette based on number of animals
pal <- colorNumeric(
palette = c("#3B82F6", "#1e40af", "#1e3a8a"),
domain = map_data$Animals
)
# Scale bubble size (sqrt transformation for better visual scaling)
map_data <- map_data %>%
mutate(
radius = sqrt(Animals) / max(sqrt(Animals)) * 40 + 5,
popup_content = paste0(
"<div style='font-family: sans-serif; padding: 10px;'>",
"<h4 style='color: #1a1f36; margin: 0 0 10px 0; border-bottom: 2px solid #D4AF37; padding-bottom: 5px;'>",
"<strong>", region, "</strong></h4>",
"<table style='width: 100%; font-size: 13px;'>",
"<tr><td style='padding: 3px 10px 3px 0;'>🐄 Animals:</td><td style='font-weight: bold; color: #3B82F6;'>",
format(Animals, big.mark = ","), "</td></tr>",
"<tr><td style='padding: 3px 10px 3px 0;'>🏠 Farms:</td><td style='font-weight: bold; color: #10B981;'>",
format(Farms, big.mark = ","), "</td></tr>",
"<tr><td style='padding: 3px 10px 3px 0;'>📍 Districts:</td><td style='font-weight: bold; color: #7C3AED;'>",
Districts, "</td></tr>",
"<tr><td style='padding: 3px 10px 3px 0;'>📊 Share:</td><td style='font-weight: bold; color: #F97316;'>",
Percentage, "%</td></tr>",
"</table></div>"
)
)
# Create the map
leaflet(map_data, width = "100%", height = 600) %>%
addProviderTiles("CartoDB.Positron", group = "Light") %>%
addProviderTiles("Esri.WorldImagery", group = "Satellite") %>%
addProviderTiles("OpenStreetMap", group = "OpenStreetMap") %>%
addProviderTiles("Esri.WorldTopoMap", group = "Topographic") %>%
# Add circle markers
addCircleMarkers(
lng = ~lng, lat = ~lat,
radius = ~radius,
color = "#1e3a8a",
weight = 2,
opacity = 0.9,
fillColor = ~pal(Animals),
fillOpacity = 0.7,
popup = ~popup_content,
label = ~paste0(region, ": ", format(Animals, big.mark = ","), " animals"),
labelOptions = labelOptions(
style = list("font-weight" = "bold", "font-size" = "12px"),
textsize = "12px",
direction = "auto"
),
group = "Animals"
) %>%
# Add legend
addLegend(
position = "bottomright",
pal = pal,
values = ~Animals,
title = "Number of<br>Animals",
opacity = 0.9,
labFormat = labelFormat(big.mark = ",")
) %>%
# Add layer controls
addLayersControl(
baseGroups = c("Light", "Satellite", "OpenStreetMap", "Topographic"),
options = layersControlOptions(collapsed = TRUE)
) %>%
# Add scale bar
addScaleBar(position = "bottomleft") %>%
# Set default view to Kenya
setView(lng = 37.0, lat = -0.5, zoom = 6) %>%
# Add title
addControl(
html = "<div style='background: white; padding: 10px 15px; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); font-family: sans-serif;'>
<strong style='font-size: 16px; color: #1a1f36;'>🐄 Kenya Dairy Cattle Distribution</strong><br>
<span style='font-size: 12px; color: #6B7280;'>Click markers for detailed statistics</span></div>",
position = "topright"
)
} else {
cat("No regions could be mapped. Please check that region names in your data match the coordinate lookup table.\n")
}# ==============================================================================
# HEATMAP VISUALIZATION - Farm Density
# ==============================================================================
if(nrow(map_data) > 0) {
# Create expanded points for heatmap effect (simulate density)
# Weight by number of farms
heatmap_points <- map_data %>%
slice(rep(1:n(), times = ceiling(Farms/100))) %>%
mutate(
# Add small random jitter for visual effect
lat_jitter = lat + runif(n(), -0.15, 0.15),
lng_jitter = lng + runif(n(), -0.15, 0.15)
)
leaflet(width = "100%", height = 600) %>%
addProviderTiles("CartoDB.DarkMatter", group = "Dark") %>%
addProviderTiles("CartoDB.Positron", group = "Light") %>%
# Add heatmap layer
addHeatmap(
data = heatmap_points,
lng = ~lng_jitter, lat = ~lat_jitter,
intensity = ~Farms/max(Farms),
blur = 25,
max = 0.8,
radius = 30,
gradient = c(
"0" = "transparent",
"0.2" = "#3B82F6",
"0.4" = "#10B981",
"0.6" = "#F97316",
"0.8" = "#EF4444",
"1" = "#DC2626"
),
group = "Heatmap"
) %>%
# Add region labels
addLabelOnlyMarkers(
data = map_data,
lng = ~lng, lat = ~lat,
label = ~region,
labelOptions = labelOptions(
noHide = TRUE,
direction = "center",
textOnly = TRUE,
style = list(
"color" = "white",
"font-weight" = "bold",
"font-size" = "11px",
"text-shadow" = "1px 1px 2px black"
)
),
group = "Labels"
) %>%
addLayersControl(
baseGroups = c("Dark", "Light"),
overlayGroups = c("Heatmap", "Labels"),
options = layersControlOptions(collapsed = TRUE)
) %>%
addScaleBar(position = "bottomleft") %>%
setView(lng = 37.0, lat = -0.5, zoom = 6) %>%
addControl(
html = "<div style='background: rgba(26,31,54,0.9); padding: 10px 15px; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.3); font-family: sans-serif;'>
<strong style='font-size: 16px; color: white;'>🔥 Farm Density Heatmap</strong><br>
<span style='font-size: 12px; color: #9CA3AF;'>Intensity reflects concentration of dairy farms</span></div>",
position = "topright"
)
}# ==============================================================================
# CLUSTERED MARKERS - Interactive Farm Exploration
# ==============================================================================
if(nrow(map_data) > 0) {
# Assign colors based on animal count
map_data <- map_data %>%
mutate(
marker_color = case_when(
Animals > 10000 ~ "#DC2626",
Animals > 5000 ~ "#F97316",
Animals > 1000 ~ "#0891B2",
TRUE ~ "#3B82F6"
)
)
leaflet(map_data, width = "100%", height = 600) %>%
addProviderTiles("CartoDB.Voyager", group = "Voyager") %>%
addProviderTiles("Esri.WorldStreetMap", group = "Streets") %>%
addProviderTiles("Esri.WorldTopoMap", group = "Terrain") %>%
addCircleMarkers(
lng = ~lng, lat = ~lat,
radius = 12,
color = ~marker_color,
weight = 2,
opacity = 1,
fillColor = ~marker_color,
fillOpacity = 0.7,
popup = ~popup_content,
label = ~paste0(region, " (", format(Animals, big.mark = ","), ")"),
clusterOptions = markerClusterOptions(
iconCreateFunction = JS("
function(cluster) {
var childCount = cluster.getChildCount();
var c = ' marker-cluster-';
if (childCount < 5) {
c += 'small';
} else if (childCount < 10) {
c += 'medium';
} else {
c += 'large';
}
return new L.DivIcon({
html: '<div><span>' + childCount + '</span></div>',
className: 'marker-cluster' + c,
iconSize: new L.Point(40, 40)
});
}
")
)
) %>%
addLayersControl(
baseGroups = c("Voyager", "Streets", "Terrain"),
options = layersControlOptions(collapsed = TRUE)
) %>%
addScaleBar(position = "bottomleft") %>%
setView(lng = 37.0, lat = -0.5, zoom = 6) %>%
addControl(
html = "<div style='background: white; padding: 10px 15px; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); font-family: sans-serif;'>
<strong style='font-size: 16px; color: #1a1f36;'>📍 Regional Clusters</strong><br>
<span style='font-size: 12px; color: #6B7280;'>Click clusters to zoom, markers for details</span><br>
<div style='margin-top: 8px; font-size: 11px;'>
<span style='color: #DC2626;'>●</span> >10,000
<span style='color: #F97316;'>●</span> 5,000-10,000
<span style='color: #0891B2;'>●</span> 1,000-5,000
<span style='color: #3B82F6;'>●</span> <1,000
</div></div>",
position = "topright"
)
}# ==============================================================================
# PREPARE TRAIT DATA BY REGION FOR MAPPING
# (This section will be populated after phenotype data is processed)
# ==============================================================================
# Note: This chunk prepares the trait summary data for mapping
# It will be displayed after the phenotypic data preparation section below# Determine which breed column exists
breed_col <- if("breed" %in% names(pedigree)) "breed" else if("main_breed" %in% names(pedigree)) "main_breed" else NULL
if(!is.null(breed_col)) {
breed_summary <- pedigree %>%
filter(!is.na(.data[[breed_col]])) %>%
count(breed_val = .data[[breed_col]], name = "Count") %>%
arrange(desc(Count)) %>%
head(12) %>%
mutate(Percentage = round(Count / sum(Count) * 100, 1))
# Show table first
breed_summary %>%
rename(Breed = breed_val, Animals = Count, `%` = Percentage) %>%
mutate(Animals = format(Animals, big.mark = ",")) %>%
styled_table(caption = "Breed composition of study population")
}| Breed | Animals | % |
|---|---|---|
| Holstein Friesian | 109,158 | 51.2 |
| Ayrshire | 32,484 | 15.2 |
| Sahiwal | 20,880 | 9.8 |
| Boran | 18,976 | 8.9 |
| Jersey | 8,631 | 4.1 |
| Guernsey | 6,293 | 3.0 |
| Holstein-Friesian crosses | 3,726 | 1.7 |
| Unknown | 3,484 | 1.6 |
| Others | 3,341 | 1.6 |
| Simmental | 2,824 | 1.3 |
| Local Zebu | 2,169 | 1.0 |
| Sahiwal crosses | 1,143 | 0.5 |
if(!is.null(breed_col)) {
breed_summary <- breed_summary %>%
mutate(breed_val = factor(breed_val, levels = rev(breed_val)))
ggplot(breed_summary, aes(x = breed_val, y = Count)) +
geom_col(aes(fill = Count), width = 0.7, show.legend = FALSE) +
geom_text(aes(label = paste0(format(Count, big.mark = ","), " (", Percentage, "%)")),
hjust = 1.1, size = 3.8, fontface = "bold", color = "white") +
scale_fill_gradient(low = "#F97316", high = "#9a3412") +
scale_y_continuous(expand = expansion(mult = c(0, 0.05)), labels = comma) +
coord_flip() +
labs(
title = "Breed Composition of Study Population",
subtitle = paste("Total:", format(sum(breed_summary$Count), big.mark = ","), "animals"),
x = NULL,
y = "Number of Animals"
) +
theme(
axis.text.y = element_text(size = 10, face = "bold"),
plot.title = element_text(size = 14)
)
}# ==============================================================================
# PREPARE LOOKUP TABLE FROM PEDIGREE (region only - farm_id is in phenotype files)
# ==============================================================================
# Determine breed column in pedigree
breed_col_ped <- if("breed" %in% names(pedigree)) "breed" else "main_breed"
pedigree_lookup <- pedigree %>%
mutate(animal_id = as.character(animal_id)) %>%
select(animal_id, region, district, any_of(breed_col_ped)) %>%
rename(breed_ped = any_of(breed_col_ped)) %>%
group_by(animal_id) %>%
slice(1) %>%
ungroup()
cat("Pedigree lookup created with", nrow(pedigree_lookup), "animals\n\n")Pedigree lookup created with 248208 animals
# ==============================================================================
# AGE AT FIRST CALVING (AFC) DATA
# ==============================================================================
cat("Processing AFC data...\n")Processing AFC data...
afc_data <- lactation %>%
mutate(
animal_id = as.character(animal_id),
afc_months = as.numeric(afc_months),
parity = as.numeric(parity)
) %>%
filter(
parity == 1,
!is.na(afc_months),
afc_months >= 18,
afc_months <= 72
) %>%
left_join(pedigree_lookup, by = "animal_id") %>%
filter(!is.na(region)) %>%
mutate(
# Use main_breed_name from lactation if available, otherwise from pedigree
breed = coalesce(main_breed_name, breed_ped)
) %>%
filter(!is.na(breed), !is.na(farm_id)) %>%
group_by(animal_id) %>%
slice(1) %>%
ungroup() %>%
mutate(
region = factor(region),
farm_id = factor(farm_id),
breed = factor(breed)
)
cat("AFC records after filtering:", format(nrow(afc_data), big.mark = ","), "\n")AFC records after filtering: 27,503
# ==============================================================================
# DAILY MILK YIELD (TEST-DAY) DATA
# ==============================================================================
cat("Processing Daily Milk data...\n")Processing Daily Milk data...
daily_milk_data <- milking %>%
mutate(
animal_id = as.character(animal_id),
daily_yield = as.numeric(daily_yield),
dim = as.numeric(dim),
parity = as.numeric(parity)
) %>%
filter(
!is.na(daily_yield),
daily_yield > 0.5,
daily_yield < 60,
!is.na(dim),
dim >= 5,
dim <= 500
) %>%
left_join(pedigree_lookup, by = "animal_id") %>%
filter(!is.na(region)) %>%
mutate(
breed = coalesce(main_breed_name, breed_ped)
) %>%
filter(!is.na(breed), !is.na(farm_id)) %>%
mutate(
parity_class = factor(ifelse(is.na(parity) | parity < 1, 1, ifelse(parity > 4, 4, parity))),
dim_class = factor(ifelse(ceiling(dim / 30) > 17, 17, ceiling(dim / 30))),
region = factor(region),
farm_id = factor(farm_id),
breed = factor(breed)
)
cat("Daily Milk records after filtering:", format(nrow(daily_milk_data), big.mark = ","), "\n")Daily Milk records after filtering: 582,060
# ==============================================================================
# TOTAL LACTATION YIELD (TLY) DATA
# ==============================================================================
cat("Processing TLY data...\n")Processing TLY data...
tly_data <- lactation %>%
mutate(
animal_id = as.character(animal_id),
total_lactation_yield = as.numeric(total_lactation_yield),
parity = as.numeric(parity)
) %>%
filter(
!is.na(total_lactation_yield),
total_lactation_yield >= 100,
total_lactation_yield <= 20000
) %>%
left_join(pedigree_lookup, by = "animal_id") %>%
filter(!is.na(region)) %>%
mutate(
breed = coalesce(main_breed_name, breed_ped)
) %>%
filter(!is.na(breed), !is.na(farm_id)) %>%
mutate(
parity_class = factor(ifelse(is.na(parity) | parity < 1, 1, ifelse(parity > 4, 4, parity))),
region = factor(region),
farm_id = factor(farm_id),
breed = factor(breed)
)
cat("TLY records after filtering:", format(nrow(tly_data), big.mark = ","), "\n")TLY records after filtering: 9,558
# ==============================================================================
# CALVING INTERVAL (CI) DATA
# ==============================================================================
cat("Processing CI data...\n")Processing CI data...
ci_data <- lactation %>%
mutate(
animal_id = as.character(animal_id),
calving_interval_days = as.numeric(calving_interval_days),
parity = as.numeric(parity)
) %>%
filter(
parity > 1,
!is.na(calving_interval_days),
calving_interval_days >= 280,
calving_interval_days <= 900
) %>%
left_join(pedigree_lookup, by = "animal_id") %>%
filter(!is.na(region)) %>%
mutate(
breed = coalesce(main_breed_name, breed_ped)
) %>%
filter(!is.na(breed), !is.na(farm_id)) %>%
mutate(
parity_class = factor(ifelse(parity > 4, 4, parity)),
region = factor(region),
farm_id = factor(farm_id),
breed = factor(breed)
)
cat("CI records after filtering:", format(nrow(ci_data), big.mark = ","), "\n\n")CI records after filtering: 44,049
# ==============================================================================
# SUMMARY TABLE
# ==============================================================================
data.frame(
Trait = c("Age at First Calving", "Daily Milk Yield", "Total Lactation Yield", "Calving Interval"),
Abbrev = c("AFC", "DMY", "TLY", "CI"),
Records = c(nrow(afc_data), nrow(daily_milk_data), nrow(tly_data), nrow(ci_data)),
Animals = c(n_distinct(afc_data$animal_id), n_distinct(daily_milk_data$animal_id),
n_distinct(tly_data$animal_id), n_distinct(ci_data$animal_id)),
Farms = c(n_distinct(afc_data$farm_id), n_distinct(daily_milk_data$farm_id),
n_distinct(tly_data$farm_id), n_distinct(ci_data$farm_id)),
Filter = c("Parity=1, 18-72 mo", "0.5-60 kg, DIM 5-500", "100-20,000 kg", "Parity≥2, 280-900 d")
) %>%
mutate(Records = format(Records, big.mark = ","),
Animals = format(Animals, big.mark = ","),
Farms = format(Farms, big.mark = ",")) %>%
styled_table(caption = "Data available after quality control filtering")| Trait | Abbrev | Records | Animals | Farms | Filter |
|---|---|---|---|---|---|
| Age at First Calving | AFC | 27,503 | 27,503 | 1,239 | Parity=1, 18-72 mo |
| Daily Milk Yield | DMY | 582,060 | 15,255 | 647 | 0.5-60 kg, DIM 5-500 |
| Total Lactation Yield | TLY | 9,558 | 4,931 | 354 | 100-20,000 kg |
| Calving Interval | CI | 44,049 | 17,527 | 440 | Parity≥2, 280-900 d |
# ==============================================================================
# TRAIT PERFORMANCE MAP - Daily Milk Yield by Region
# ==============================================================================
# Calculate regional trait means
trait_by_region <- daily_milk_data %>%
group_by(region) %>%
summarise(
n_records = n(),
n_animals = n_distinct(animal_id),
mean_milk = round(mean(daily_yield, na.rm = TRUE), 2),
sd_milk = round(sd(daily_yield, na.rm = TRUE), 2),
.groups = 'drop'
) %>%
mutate(region = as.character(region))
# Merge with coordinates
trait_map_data <- trait_by_region %>%
left_join(kenya_coords, by = "region") %>%
filter(!is.na(lat), !is.na(lng))
if(nrow(trait_map_data) > 0) {
# Color palette for milk yield
milk_pal <- colorNumeric(
palette = c("#FEE2E2", "#FECACA", "#FCA5A5", "#F87171", "#EF4444", "#DC2626", "#B91C1C"),
domain = trait_map_data$mean_milk
)
# Create popup content
trait_map_data <- trait_map_data %>%
mutate(
popup_trait = paste0(
"<div style='font-family: sans-serif; padding: 10px; min-width: 200px;'>",
"<h4 style='color: #1a1f36; margin: 0 0 10px 0; border-bottom: 2px solid #3B82F6; padding-bottom: 5px;'>",
"<strong>", region, "</strong></h4>",
"<div style='background: #EFF6FF; padding: 10px; border-radius: 8px; margin-bottom: 10px;'>",
"<span style='font-size: 24px; font-weight: bold; color: #1e40af;'>", mean_milk, "</span>",
"<span style='font-size: 14px; color: #3B82F6;'> kg/day</span>",
"</div>",
"<table style='width: 100%; font-size: 12px;'>",
"<tr><td>± SD:</td><td style='font-weight: bold;'>", sd_milk, " kg</td></tr>",
"<tr><td>Records:</td><td style='font-weight: bold;'>", format(n_records, big.mark = ","), "</td></tr>",
"<tr><td>Animals:</td><td style='font-weight: bold;'>", format(n_animals, big.mark = ","), "</td></tr>",
"</table></div>"
)
)
leaflet(trait_map_data, width = "100%", height = 600) %>%
addProviderTiles("CartoDB.Positron", group = "Light") %>%
addProviderTiles("Esri.WorldGrayCanvas", group = "Gray") %>%
# Choropleth-style circles
addCircleMarkers(
lng = ~lng, lat = ~lat,
radius = ~sqrt(n_animals) / max(sqrt(n_animals)) * 35 + 8,
color = "#1e40af",
weight = 2,
opacity = 0.8,
fillColor = ~milk_pal(mean_milk),
fillOpacity = 0.8,
popup = ~popup_trait,
label = ~paste0(region, ": ", mean_milk, " kg/day")
) %>%
# Add labels with values
addLabelOnlyMarkers(
lng = ~lng, lat = ~lat,
label = ~paste0(round(mean_milk, 1)),
labelOptions = labelOptions(
noHide = TRUE,
direction = "center",
textOnly = TRUE,
style = list(
"color" = "#1e3a8a",
"font-weight" = "bold",
"font-size" = "11px"
)
)
) %>%
addLegend(
position = "bottomright",
pal = milk_pal,
values = ~mean_milk,
title = "Daily Milk<br>(kg/day)",
opacity = 0.9
) %>%
addLayersControl(
baseGroups = c("Light", "Gray"),
options = layersControlOptions(collapsed = TRUE)
) %>%
addScaleBar(position = "bottomleft") %>%
setView(lng = 37.0, lat = -0.5, zoom = 6) %>%
addControl(
html = "<div style='background: white; padding: 10px 15px; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); font-family: sans-serif;'>
<strong style='font-size: 16px; color: #1a1f36;'>🥛 Daily Milk Yield by Region</strong><br>
<span style='font-size: 12px; color: #6B7280;'>Circle size = # animals; Color = yield</span></div>",
position = "topright"
)
}# ==============================================================================
# MULTI-TRAIT COMPARISON MAP
# ==============================================================================
# Calculate all trait means by region
all_traits_by_region <- bind_rows(
afc_data %>%
group_by(region) %>%
summarise(mean_val = mean(afc_months), n = n(), .groups = 'drop') %>%
mutate(trait = "AFC", unit = "months"),
daily_milk_data %>%
group_by(region) %>%
summarise(mean_val = mean(daily_yield), n = n(), .groups = 'drop') %>%
mutate(trait = "Daily Milk", unit = "kg/day"),
tly_data %>%
group_by(region) %>%
summarise(mean_val = mean(total_lactation_yield), n = n(), .groups = 'drop') %>%
mutate(trait = "Lactation Yield", unit = "kg"),
ci_data %>%
group_by(region) %>%
summarise(mean_val = mean(calving_interval_days), n = n(), .groups = 'drop') %>%
mutate(trait = "Calving Interval", unit = "days")
) %>%
mutate(region = as.character(region))
# Pivot to wide format for popup
traits_wide <- all_traits_by_region %>%
select(region, trait, mean_val) %>%
pivot_wider(names_from = trait, values_from = mean_val) %>%
left_join(kenya_coords, by = "region") %>%
filter(!is.na(lat), !is.na(lng))
if(nrow(traits_wide) > 0) {
# Create comprehensive popup
traits_wide <- traits_wide %>%
mutate(
popup_all = paste0(
"<div style='font-family: sans-serif; padding: 12px; min-width: 220px;'>",
"<h4 style='color: #1a1f36; margin: 0 0 12px 0; border-bottom: 3px solid #D4AF37; padding-bottom: 8px;'>",
region, "</h4>",
"<div style='display: grid; grid-template-columns: 1fr 1fr; gap: 8px;'>",
# AFC
"<div style='background: linear-gradient(135deg, #FFF7ED, #FFEDD5); padding: 10px; border-radius: 8px; border-left: 3px solid #F97316;'>",
"<div style='font-size: 11px; color: #9A3412;'>AFC</div>",
"<div style='font-size: 18px; font-weight: bold; color: #C2410C;'>",
ifelse(is.na(AFC), "N/A", paste0(round(AFC, 1), " mo")), "</div></div>",
# Daily Milk
"<div style='background: linear-gradient(135deg, #EFF6FF, #DBEAFE); padding: 10px; border-radius: 8px; border-left: 3px solid #3B82F6;'>",
"<div style='font-size: 11px; color: #1E40AF;'>Daily Milk</div>",
"<div style='font-size: 18px; font-weight: bold; color: #1D4ED8;'>",
ifelse(is.na(`Daily Milk`), "N/A", paste0(round(`Daily Milk`, 1), " kg")), "</div></div>",
# Lactation Yield
"<div style='background: linear-gradient(135deg, #ECFDF5, #D1FAE5); padding: 10px; border-radius: 8px; border-left: 3px solid #10B981;'>",
"<div style='font-size: 11px; color: #065F46;'>Lactation</div>",
"<div style='font-size: 18px; font-weight: bold; color: #047857;'>",
ifelse(is.na(`Lactation Yield`), "N/A", paste0(format(round(`Lactation Yield`, 0), big.mark = ","), " kg")), "</div></div>",
# Calving Interval
"<div style='background: linear-gradient(135deg, #F5F3FF, #EDE9FE); padding: 10px; border-radius: 8px; border-left: 3px solid #7C3AED;'>",
"<div style='font-size: 11px; color: #5B21B6;'>CI</div>",
"<div style='font-size: 18px; font-weight: bold; color: #6D28D9;'>",
ifelse(is.na(`Calving Interval`), "N/A", paste0(round(`Calving Interval`, 0), " d")), "</div></div>",
"</div></div>"
)
)
leaflet(traits_wide, width = "100%", height = 600) %>%
addProviderTiles("CartoDB.Voyager", group = "Voyager") %>%
addProviderTiles("Esri.WorldGrayCanvas", group = "Gray") %>%
addCircleMarkers(
lng = ~lng, lat = ~lat,
radius = 18,
color = "#D4AF37",
weight = 3,
opacity = 1,
fillColor = "#1a1f36",
fillOpacity = 0.85,
popup = ~popup_all,
label = ~region
) %>%
addLabelOnlyMarkers(
lng = ~lng, lat = ~lat,
label = ~region,
labelOptions = labelOptions(
noHide = TRUE,
direction = "bottom",
textOnly = TRUE,
offset = c(0, 15),
style = list(
"color" = "#1a1f36",
"font-weight" = "bold",
"font-size" = "10px"
)
)
) %>%
addLayersControl(
baseGroups = c("Voyager", "Gray"),
options = layersControlOptions(collapsed = TRUE)
) %>%
addScaleBar(position = "bottomleft") %>%
setView(lng = 37.0, lat = -0.5, zoom = 6) %>%
addControl(
html = "<div style='background: white; padding: 12px 15px; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); font-family: sans-serif;'>
<strong style='font-size: 16px; color: #1a1f36;'>📊 Multi-Trait Regional Performance</strong><br>
<span style='font-size: 12px; color: #6B7280;'>Click markers to compare all traits</span></div>",
position = "topright"
)
}data.frame(
Trait = c("AFC (months)", "Daily Milk (kg)", "Lactation Milk (kg)", "Calving Interval (days)"),
N = c(nrow(afc_data), nrow(daily_milk_data), nrow(tly_data), nrow(ci_data)),
Mean = c(mean(afc_data$afc_months), mean(daily_milk_data$daily_yield),
mean(tly_data$total_lactation_yield), mean(ci_data$calving_interval_days)),
SD = c(sd(afc_data$afc_months), sd(daily_milk_data$daily_yield),
sd(tly_data$total_lactation_yield), sd(ci_data$calving_interval_days)),
Min = c(min(afc_data$afc_months), min(daily_milk_data$daily_yield),
min(tly_data$total_lactation_yield), min(ci_data$calving_interval_days)),
Median = c(median(afc_data$afc_months), median(daily_milk_data$daily_yield),
median(tly_data$total_lactation_yield), median(ci_data$calving_interval_days)),
Max = c(max(afc_data$afc_months), max(daily_milk_data$daily_yield),
max(tly_data$total_lactation_yield), max(ci_data$calving_interval_days)),
CV = c(sd(afc_data$afc_months)/mean(afc_data$afc_months)*100,
sd(daily_milk_data$daily_yield)/mean(daily_milk_data$daily_yield)*100,
sd(tly_data$total_lactation_yield)/mean(tly_data$total_lactation_yield)*100,
sd(ci_data$calving_interval_days)/mean(ci_data$calving_interval_days)*100)
) %>%
mutate(N = format(N, big.mark = ","),
across(c(Mean, SD, CV, Min, Median, Max), ~round(., 2))) %>%
styled_table(caption = "Descriptive statistics for all traits")| Trait | N | Mean | SD | Min | Median | Max | CV |
|---|---|---|---|---|---|---|---|
| AFC (months) | 27,503 | 39.34 | 11.90 | 18.00 | 36.0 | 72.0 | 30.24 |
| Daily Milk (kg) | 582,060 | 9.24 | 7.07 | 0.51 | 9.2 | 50.0 | 76.54 |
| Lactation Milk (kg) | 9,558 | 979.05 | 1706.33 | 100.00 | 364.3 | 19583.5 | 174.28 |
| Calving Interval (days) | 44,049 | 459.45 | 130.92 | 280.00 | 416.0 | 900.0 | 28.49 |
p_afc <- ggplot(afc_data, aes(x = afc_months)) +
geom_histogram(aes(y = after_stat(density)), bins = 35,
fill = trait_colors["AFC"], color = "white", alpha = 0.85) +
geom_density(linewidth = 1.2, color = "#9a3412") +
geom_vline(xintercept = mean(afc_data$afc_months), linetype = "dashed",
color = "#1a1f36", linewidth = 1) +
annotate("text", x = mean(afc_data$afc_months) + 3, y = Inf, vjust = 2, hjust = 0,
label = paste0("Mean: ", round(mean(afc_data$afc_months), 1), " mo"),
fontface = "bold", size = 3.5) +
labs(title = "Age at First Calving", x = "AFC (months)", y = "Density")
p_dm <- ggplot(daily_milk_data, aes(x = daily_yield)) +
geom_histogram(aes(y = after_stat(density)), bins = 45,
fill = trait_colors["Daily Milk"], color = "white", alpha = 0.85) +
geom_density(linewidth = 1.2, color = "#1e3a8a") +
geom_vline(xintercept = mean(daily_milk_data$daily_yield), linetype = "dashed",
color = "#1a1f36", linewidth = 1) +
annotate("text", x = mean(daily_milk_data$daily_yield) + 2, y = Inf, vjust = 2, hjust = 0,
label = paste0("Mean: ", round(mean(daily_milk_data$daily_yield), 1), " kg"),
fontface = "bold", size = 3.5) +
labs(title = "Daily Milk Yield", x = "Daily Yield (kg)", y = "Density")
p_tly <- ggplot(tly_data, aes(x = total_lactation_yield)) +
geom_histogram(aes(y = after_stat(density)), bins = 45,
fill = trait_colors["Lactation Milk"], color = "white", alpha = 0.85) +
geom_density(linewidth = 1.2, color = "#065f46") +
geom_vline(xintercept = mean(tly_data$total_lactation_yield), linetype = "dashed",
color = "#1a1f36", linewidth = 1) +
annotate("text", x = mean(tly_data$total_lactation_yield) + 500, y = Inf, vjust = 2, hjust = 0,
label = paste0("Mean: ", format(round(mean(tly_data$total_lactation_yield), 0), big.mark = ","), " kg"),
fontface = "bold", size = 3.5) +
scale_x_continuous(labels = comma) +
labs(title = "Total Lactation Yield", x = "Lactation Yield (kg)", y = "Density")
p_ci <- ggplot(ci_data, aes(x = calving_interval_days)) +
geom_histogram(aes(y = after_stat(density)), bins = 40,
fill = trait_colors["Calving Interval"], color = "white", alpha = 0.85) +
geom_density(linewidth = 1.2, color = "#4c1d95") +
geom_vline(xintercept = mean(ci_data$calving_interval_days), linetype = "dashed",
color = "#1a1f36", linewidth = 1) +
annotate("text", x = mean(ci_data$calving_interval_days) + 30, y = Inf, vjust = 2, hjust = 0,
label = paste0("Mean: ", round(mean(ci_data$calving_interval_days), 0), " d"),
fontface = "bold", size = 3.5) +
labs(title = "Calving Interval", x = "Calving Interval (days)", y = "Density")
(p_afc + p_dm) / (p_tly + p_ci) +
plot_annotation(
title = "Distribution of Dairy Cattle Traits in Kenya",
subtitle = "Dashed lines indicate population means",
theme = theme(plot.title = element_text(size = 18, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 12, hjust = 0.5, color = "#6B7280"))
)# Boxplots with improved spacing
p1_box <- ggplot(afc_data, aes(x = reorder(region, afc_months, FUN = median), y = afc_months)) +
geom_boxplot(fill = trait_colors["AFC"], alpha = 0.7, outlier.size = 0.5) +
stat_summary(fun = mean, geom = "point", shape = 18, size = 4, color = "darkred") +
stat_summary(fun = mean, geom = "text", aes(label = round(after_stat(y), 1)),
vjust = -1, size = 3.5, fontface = "bold") +
labs(title = "Age at First Calving by Region", x = NULL, y = "AFC (months)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10, face = "bold"),
plot.margin = margin(10, 10, 10, 20))
p2_box <- ggplot(daily_milk_data, aes(x = reorder(region, daily_yield, FUN = median), y = daily_yield)) +
geom_boxplot(fill = trait_colors["Daily Milk"], alpha = 0.7, outlier.size = 0.5) +
stat_summary(fun = mean, geom = "point", shape = 18, size = 4, color = "darkblue") +
stat_summary(fun = mean, geom = "text", aes(label = round(after_stat(y), 1)),
vjust = -1, size = 3.5, fontface = "bold") +
labs(title = "Daily Milk Yield by Region", x = NULL, y = "Daily Milk (kg)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10, face = "bold"),
plot.margin = margin(10, 10, 10, 20))
p3_box <- ggplot(tly_data, aes(x = reorder(region, total_lactation_yield, FUN = median), y = total_lactation_yield)) +
geom_boxplot(fill = trait_colors["Lactation Milk"], alpha = 0.7, outlier.size = 0.5) +
stat_summary(fun = mean, geom = "point", shape = 18, size = 4, color = "darkgreen") +
stat_summary(fun = mean, geom = "text", aes(label = format(round(after_stat(y), 0), big.mark = ",")),
vjust = -1, size = 3.5, fontface = "bold") +
labs(title = "Total Lactation Yield by Region", x = NULL, y = "Lactation Milk (kg)") +
scale_y_continuous(labels = comma) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10, face = "bold"),
plot.margin = margin(10, 10, 10, 20))
p4_box <- ggplot(ci_data, aes(x = reorder(region, calving_interval_days, FUN = median), y = calving_interval_days)) +
geom_boxplot(fill = trait_colors["Calving Interval"], alpha = 0.7, outlier.size = 0.5) +
stat_summary(fun = mean, geom = "point", shape = 18, size = 4, color = "purple4") +
stat_summary(fun = mean, geom = "text", aes(label = round(after_stat(y), 0)),
vjust = -1, size = 3.5, fontface = "bold") +
labs(title = "Calving Interval by Region", x = NULL, y = "CI (days)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10, face = "bold"),
plot.margin = margin(10, 10, 10, 20))
(p1_box + p2_box) / (p3_box + p4_box) +
plot_annotation(
title = "Trait Distribution by Region",
subtitle = "Diamond markers show mean values; numbers above show regional means",
theme = theme(plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
plot.subtitle = element_text(size = 11, hjust = 0.5, color = "#6B7280"))
)dim_summary <- daily_milk_data %>%
mutate(dim_class_num = as.numeric(as.character(dim_class))) %>%
group_by(dim_class_num) %>%
summarise(N = n(), Mean = mean(daily_yield), SD = sd(daily_yield),
SE = SD / sqrt(N), Lower = Mean - 1.96 * SE, Upper = Mean + 1.96 * SE,
.groups = 'drop') %>%
mutate(DIM_midpoint = (dim_class_num - 0.5) * 30)
peak_row <- dim_summary[which.max(dim_summary$Mean), ]
ggplot(dim_summary, aes(x = DIM_midpoint, y = Mean)) +
geom_ribbon(aes(ymin = Lower, ymax = Upper), fill = trait_colors["Daily Milk"], alpha = 0.25) +
geom_line(linewidth = 1.5, color = trait_colors["Daily Milk"]) +
geom_point(size = 3.5, color = trait_colors["Daily Milk"], fill = "white", shape = 21, stroke = 1.5) +
geom_vline(xintercept = peak_row$DIM_midpoint, linetype = "dashed", color = "#dc2626", linewidth = 0.8) +
geom_point(data = peak_row, aes(x = DIM_midpoint, y = Mean), size = 6, color = "#dc2626", shape = 18) +
annotate("text", x = peak_row$DIM_midpoint + 35, y = peak_row$Mean + 0.5,
label = paste0("Peak: ", round(peak_row$Mean, 1), " kg\n@ DIM ", round(peak_row$DIM_midpoint, 0)),
color = "#dc2626", fontface = "bold", size = 4) +
scale_x_continuous(breaks = seq(0, 500, 50), limits = c(0, 520)) +
labs(title = "Average Lactation Curve of Kenya Dairy Cattle",
subtitle = "Mean daily milk yield (±95% CI) across days in milk (DIM)",
x = "Days in Milk (DIM)", y = "Daily Milk Yield (kg)",
caption = paste("Based on", format(nrow(daily_milk_data), big.mark = ","), "test-day records"))afc_model <- lm(afc_months ~ region + breed, data = afc_data)
dm_model <- lm(daily_yield ~ region + breed + parity_class + dim_class, data = daily_milk_data)
tly_model <- lm(total_lactation_yield ~ region + breed + parity_class, data = tly_data)
ci_model <- lm(calving_interval_days ~ region + breed + parity_class, data = ci_data)
data.frame(
Trait = c("AFC", "Daily Milk", "Lactation Yield", "Calving Interval"),
Model = c("Region + Breed", "Region + Breed + Parity + DIM",
"Region + Breed + Parity", "Region + Breed + Parity"),
N = c(nrow(afc_data), nrow(daily_milk_data), nrow(tly_data), nrow(ci_data)),
R2 = c(summary(afc_model)$r.squared, summary(dm_model)$r.squared,
summary(tly_model)$r.squared, summary(ci_model)$r.squared) * 100,
Adj_R2 = c(summary(afc_model)$adj.r.squared, summary(dm_model)$adj.r.squared,
summary(tly_model)$adj.r.squared, summary(ci_model)$adj.r.squared) * 100
) %>%
mutate(N = format(N, big.mark = ","),
R2 = paste0(round(R2, 2), "%"),
Adj_R2 = paste0(round(Adj_R2, 2), "%")) %>%
rename(`R² (%)` = R2, `Adj. R² (%)` = Adj_R2) %>%
styled_table(caption = "Variance explained by fixed effects models")| Trait | Model | N | R² (%) | Adj. R² (%) |
|---|---|---|---|---|
| AFC | Region + Breed | 27,503 | 22.07% | 21.9% |
| Daily Milk | Region + Breed + Parity + DIM | 582,060 | 53.8% | 53.8% |
| Lactation Yield | Region + Breed + Parity | 9,558 | 36.27% | 35.98% |
| Calving Interval | Region + Breed + Parity | 44,049 | 6.3% | 6.18% |
r2_data <- data.frame(
Trait = c("AFC", "Daily Milk", "Lactation Yield", "Calving Interval"),
R_squared = c(summary(afc_model)$r.squared, summary(dm_model)$r.squared,
summary(tly_model)$r.squared, summary(ci_model)$r.squared) * 100
)
ggplot(r2_data, aes(x = reorder(Trait, R_squared), y = R_squared, fill = Trait)) +
geom_col(width = 0.6, show.legend = FALSE) +
geom_text(aes(label = paste0(round(R_squared, 1), "%")), hjust = -0.2, size = 5, fontface = "bold") +
scale_fill_manual(values = trait_colors) +
scale_y_continuous(expand = expansion(mult = c(0, 0.2)), limits = c(0, 100)) +
coord_flip() +
labs(title = "Variance Explained by Fixed Effects",
subtitle = "DIM class has major impact on daily milk yield",
x = NULL, y = "R-squared (%)")R version 4.5.1 (2025-06-13 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 11 x64 (build 26100)
Matrix products: default
LAPACK version 3.12.1
locale:
[1] LC_COLLATE=English_Kenya.utf8 LC_CTYPE=English_Kenya.utf8
[3] LC_MONETARY=English_Kenya.utf8 LC_NUMERIC=C
[5] LC_TIME=English_Kenya.utf8
time zone: Africa/Nairobi
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] RColorBrewer_1.1-3 htmltools_0.5.8.1 sf_1.0-24
[4] leaflet.extras_2.0.1 leaflet_2.2.3 patchwork_1.3.2
[7] broom_1.0.9 scales_1.4.0 kableExtra_1.4.0
[10] knitr_1.50 ggplot2_3.5.2 tidyr_1.3.1
[13] dplyr_1.1.4 data.table_1.17.8
loaded via a namespace (and not attached):
[1] sass_0.4.10 generics_0.1.4 class_7.3-23
[4] xml2_1.4.0 KernSmooth_2.23-26 stringi_1.8.7
[7] digest_0.6.37 magrittr_2.0.3 evaluate_1.0.5
[10] grid_4.5.1 fastmap_1.2.0 jsonlite_2.0.0
[13] e1071_1.7-17 backports_1.5.0 DBI_1.2.3
[16] purrr_1.1.0 crosstalk_1.2.2 viridisLite_0.4.2
[19] textshaping_1.0.2 jquerylib_0.1.4 cli_3.6.5
[22] rlang_1.1.6 units_1.0-0 withr_3.0.2
[25] cachem_1.1.0 yaml_2.3.10 tools_4.5.1
[28] mime_0.13 vctrs_0.6.5 R6_2.6.1
[31] proxy_0.4-29 lifecycle_1.0.4 classInt_0.4-11
[34] stringr_1.5.1 leaflet.providers_2.0.0 htmlwidgets_1.6.4
[37] pkgconfig_2.0.3 pillar_1.11.0 bslib_0.9.0
[40] gtable_0.3.6 Rcpp_1.1.0 glue_1.8.0
[43] systemfonts_1.2.3 xfun_0.52 tibble_3.3.0
[46] tidyselect_1.2.1 rstudioapi_0.17.1 farver_2.1.2
[49] labeling_0.4.3 rmarkdown_2.29 svglite_2.2.1
[52] compiler_4.5.1