Berkshire 2023 progeny testing

Data formatting, checking

Author

Juan Steibel

Published

February 22, 2024

Start reading historical data and current placement data

setwd("C:\\Users\\marti\\OneDrive\\Documents\\Berkshire\\Prog")
library(matrixStats)
library(tidyverse)
library(readr)
library(readxl)
library(DT)
getwd()
[1] "C:/Users/marti/OneDrive/Documents/Berkshire/Prog"
dt<-read_xlsx(path = "2022 ABA Progeny Test SAS File.xlsx")
#count(dt,Year,Sire)%>%count(Year)
nd<-read_xlsx(path = "LK update Placement  Weight information_v3Nov2023 (5).xlsx")
#count(nd,`Complete Sire Name`)
nd<-rename(nd,ID=`LAST 4 OF RFID`)
#datatable(nd)

Sire without data

In the original placement file there were records without a sire

filter(nd, `Complete Sire Name`=="Not provided")
# A tibble: 0 × 21
# ℹ 21 variables: Sequence <dbl>, Date <dttm>, Breeder <chr>, ID <chr>,
#   Color of Visual Tag <chr>, Pig visual ID <chr>, Pig Weight, kg <dbl>,
#   Weight, lbs. <dbl>, Pen <dbl>, Ear Notch <chr>, Entry Name <chr>,
#   Producer RFID/State ID/Accession ID <chr>, State of Origin <chr>,
#   Birth Date <chr>, ABA reg. # <dbl>, DOB <dttm>, Sex <chr>,
#   Complete Sire Name <chr>, Sire Registration Number <dbl>, Dam reg. # <lgl>,
#   RFID - Applied at LEC <chr>

No data with missing sire.

Read quality data and print samples without IMF and other MQ

Meat quality data includes many spreadsheets that are imported and compiled

cc<-read_xlsx("2023 Berkshire Purdue data 11-4-23.xlsx")
nd<-nd%>%mutate(ID=as.numeric(`ID`))
jt<-full_join(nd,cc,by = c("ID"="ID"))
#write_excel_csv(jt,"joined_IMF.csv")
#filter(jt,is.na(`IMF (%)`))%>%datatable()

st<-readxl::excel_sheets("2023 Berkshire Purdue data 11-4-23.xlsx") 
tibble_l <- lapply(st, function(x) readxl::read_excel("2023 Berkshire Purdue data 11-4-23.xlsx", sheet = x)) 
#tibble_l%>%class()

mq<-tibble_l%>%reduce(inner_join,by="ID")

Import Weight data

Weight and performance data are part of the placement file, but in a different spreadsheet. THey are read, filtered for rows with missing data and prepared for fusion with other data.

wt<-read_xlsx(path = "LK update Placement  Weight information_v3Nov2023 (5).xlsx",sheet = "individual Growth",skip = 2,col_names = c("Pen","ID",    "Sex",  "w1", "w2", "w3","w4","w5","w6","ship_date", "ship_wt", "empty","adg1", "adg2", "adg3", "adg4", "adg5", "adgwt"))

wt<-wt%>%rowwise()%>%mutate(nmiss=sum(is.na(c_across(w1:w6))))%>%filter(nmiss==0)
#wt

placement<-select(nd,Pen,ID,Breeder,`ABA reg. #`,Sex,`Complete Sire Name`, `Sire Registration Number`,`Complete Sire Name`,`Sire Registration Number`)

Join placement, weight and meat quality

THis is by far the longest data wranging operation including three tables: Placement, weight and MQ. Several steps take place here:

  1. select variables in each table

  2. rename variables as needed

  3. compute ADG for the whole test period (april-august weights)

  4. Transform Share force to starprobe units using equation from ISU paper: Instron_kg=WBSF_KG*0.8143+3.4416

  5. Compute LEA from provided data [I can’t actually guarantee this is correct, PU should confirm it]. How LEA was computed: Take 2 columns from LEA spreadsheet (Dots and Initial), LEA=Dots/20+Initial.

Finally, raw data is saved.

raw_data<-list(placement,wt,mq)%>%reduce(inner_join,by="ID")%>%
  select(Breeder,ID,Pen.x,Sex.x,`ABA reg. #`,
         Sire=`Complete Sire Name`,
         w1,w2,w3,w4,w5,w6,
         ship_date,ship_wt,
         adg1:adgwt
         ,`IMF (%)`,
         Color:Firmness,
         pH,Backfat,Initial,Dots,
         `Purge loss (%)`,
         `Carcass Length`,
         HCW,`WBSF (kg)`:`h(D65)`,)%>%
rename(Pen=Pen.x,Gender=Sex.x,ABA_regnum=`ABA reg. #`,
       weight_04_07=w1,weight_04_25=w2,weight_05_16=w3,weight_06_13=w4,weight_07_11=w5,weight_08_08=w6,
       adg_april=adg1,adg_may=adg2,adg_june=adg3,adg_july=adg4,adg_august=adg5,
       Carcass_BF10=Backfat,
       `48h_Loin_pH`=pH,
       IMF_perc=`IMF (%)`,
       Visual_Color=Color,
       Visual_Marbling=Marbling,
       Visual_Firmness=Firmness,
      Thaw_Purge=`Purge loss (%)`,
         carcass_length=`Carcass Length`,
       WBSF_KG=`WBSF (kg)`,
        `Minolta_L*`=`L*(D65)`,a_star_CIE=`a*(D65)`,b_star_CIE=`b*(D65)`,
        Hunter_L=`L(Hunter)(D65)`,a_Hunter=`a(Hunter)(D65)`,b_hunter=`b(Hunter)(D65)`,
        C_star=`C*(D65)`,h_angle=`h(D65)`)%>%
  mutate(Instron_kg=WBSF_KG*0.8143+3.4416,
         on_test_weight= weight_04_07,
         on_test_date="04-07-2023",
         off_test_weight=weight_08_08,
         off_test_date="08-08-2023",
         on_test_days=as.numeric(mdy(off_test_date) - mdy(on_test_date)),
         on_test_adg=(off_test_weight-on_test_weight)/on_test_days,
         Carcass_LEA=Dots/20+Initial,Year=2023)%>%
  select(Year, Pen, Breeder,Sire,Gender,
         on_test_date,on_test_weight,off_test_date,off_test_weight,on_test_days,on_test_adg,
         Carcass_BF10,Carcass_LEA, `48h_Loin_pH`, Visual_Color,Visual_Marbling,Visual_Firmness, `Minolta_L*`,Hunter_L,Thaw_Purge,Instron_kg,everything())%>%
  rename(`On-Test_Days`=on_test_days,`On-Test_ADG`=on_test_adg,`On_Test_WT`=on_test_weight)

#write_excel_csv(raw_data,"raw_data_f.csv")
#str(raw_data)

xlsx::write.xlsx(raw_data,"raw_data_f.xlsx")

compare this year’s results to data up to 2022

This is important to make sure we are working in the same range for all phenotypes and that we can join data for computing correction factors.

The following phenotypes are compared between the two datasets:

  • On_Test_WT

  • On-Test_ADG

  • On-Test_Days

  • Carcass_BF10

  • Carcass_LEA

  • 48h_Loin_pH

  • Visual_Color

  • Visual_Marbling

  • Minolta_L*

  • Thaw_Purge

  • Instron_kg

The comparison is on the basis of the mean value per year and sex of the pigs in the progeny test.

variables<-c("Year","Pen","Breeder","Sire","Gender","On_Test_WT","On-Test_ADG","On-Test_Days",
             "Carcass_BF10","Carcass_LEA","48h_Loin_pH","Visual_Color","Visual_Marbling",
             "Minolta_L*","Thaw_Purge","Instron_kg")

dts<-cbind(dt[,variables][,1:5],apply(dt[variables][,-1:-5],2,as.numeric))

  
dts<-mutate(dts,`On_Test_WT`= On_Test_WT*0.454,`On-Test_ADG`=`On-Test_ADG`*0.454)
mean(dts$`On-Test_ADG`,na.rm = T)
[1] 0.7506046
joint_d<-rbind(dts,
raw_data[,variables])%>%mutate(yr=c("2023","previous")[(Year!=2023)+1])%>%group_by(yr,Gender)

#joint_d
sm<-group_map(joint_d[,-c(1,2,3,4)], ~summary(.x))

compare means

mn<-group_map(joint_d[,-c(1,2,3,4)], ~colMeans(.x,na.rm = T))

nm<-group_data(joint_d)[,1:2]%>%apply(.,1,paste,collapse="_")%>%unlist()
nm
[1] "2023_B"     "2023_G"     "previous_B" "previous_G"
mn<-data.frame(mn)
colnames(mn)<-nm
mn
                     2023_B      2023_G  previous_B  previous_G
On_Test_WT       25.4687500  27.0416667  42.3341594  39.2317991
On-Test_ADG       0.6802846   0.6909214   0.7656382   0.7118534
On-Test_Days    123.0000000 123.0000000 117.9349398 120.6832298
Carcass_BF10      1.2515625   0.8750000   1.1938365   0.8096825
Carcass_LEA       6.5312500   7.3666667   6.8037990   7.4569444
48h_Loin_pH       5.5895313   5.5541667   5.7753799   5.7633025
Visual_Color      3.0312500   3.0416667   3.3958333   3.2839506
Visual_Marbling   2.1093750   1.6666667   2.2049020   1.8086420
Minolta_L*       47.2975000  45.9058333  46.9297672  46.0660802
Thaw_Purge        6.1364284   7.0105778   4.9594454   5.1079526
Instron_kg        5.8166564   5.8701759   4.5801913   4.7898827

Observations:

  1. On_Test_weights look different, these animals start much lighter

  2. ADG smaller

  3. Purge larger

  4. Instron starprobe larger

I include a full comparison below in case it is useful for a discussion.

full comparison of several statistics

names(sm)<-nm
sm
$`2023_B`
   On_Test_WT     On-Test_ADG      On-Test_Days  Carcass_BF10    Carcass_LEA   
 Min.   : 9.00   Min.   :0.3618   Min.   :123   Min.   :0.700   Min.   :4.800  
 1st Qu.:20.50   1st Qu.:0.5500   1st Qu.:123   1st Qu.:1.137   1st Qu.:6.075  
 Median :22.75   Median :0.6659   Median :123   Median :1.200   Median :6.550  
 Mean   :25.47   Mean   :0.6803   Mean   :123   Mean   :1.252   Mean   :6.531  
 3rd Qu.:31.00   3rd Qu.:0.7734   3rd Qu.:123   3rd Qu.:1.400   3rd Qu.:6.912  
 Max.   :55.50   Max.   :1.1976   Max.   :123   Max.   :1.650   Max.   :8.650  
  48h_Loin_pH     Visual_Color   Visual_Marbling   Minolta_L*   
 Min.   :5.440   Min.   :2.000   Min.   :1.500   Min.   :35.58  
 1st Qu.:5.525   1st Qu.:2.500   1st Qu.:1.500   1st Qu.:45.39  
 Median :5.562   Median :3.000   Median :2.000   Median :47.08  
 Mean   :5.590   Mean   :3.031   Mean   :2.109   Mean   :47.30  
 3rd Qu.:5.624   3rd Qu.:3.500   3rd Qu.:2.500   3rd Qu.:50.42  
 Max.   :6.225   Max.   :4.500   Max.   :4.000   Max.   :52.45  
   Thaw_Purge        Instron_kg   
 Min.   : 0.7016   Min.   :5.127  
 1st Qu.: 4.7702   1st Qu.:5.590  
 Median : 6.1222   Median :5.908  
 Mean   : 6.1364   Mean   :5.817  
 3rd Qu.: 7.0599   3rd Qu.:6.055  
 Max.   :10.8645   Max.   :6.386  

$`2023_G`
   On_Test_WT     On-Test_ADG      On-Test_Days  Carcass_BF10   
 Min.   :13.00   Min.   :0.3626   Min.   :123   Min.   :0.6000  
 1st Qu.:24.50   1st Qu.:0.4713   1st Qu.:123   1st Qu.:0.7750  
 Median :28.50   Median :0.6098   Median :123   Median :0.8750  
 Mean   :27.04   Mean   :0.6909   Mean   :123   Mean   :0.8750  
 3rd Qu.:32.38   3rd Qu.:0.8480   3rd Qu.:123   3rd Qu.:0.9625  
 Max.   :36.50   Max.   :1.3081   Max.   :123   Max.   :1.3000  
  Carcass_LEA     48h_Loin_pH     Visual_Color   Visual_Marbling
 Min.   :5.550   Min.   :5.440   Min.   :2.500   Min.   :1.000  
 1st Qu.:6.800   1st Qu.:5.522   1st Qu.:2.875   1st Qu.:1.500  
 Median :7.400   Median :5.545   Median :3.000   Median :1.500  
 Mean   :7.367   Mean   :5.554   Mean   :3.042   Mean   :1.667  
 3rd Qu.:7.950   3rd Qu.:5.586   3rd Qu.:3.500   3rd Qu.:1.500  
 Max.   :8.850   Max.   :5.745   Max.   :3.500   Max.   :3.000  
   Minolta_L*      Thaw_Purge      Instron_kg   
 Min.   :43.23   Min.   :4.731   Min.   :5.623  
 1st Qu.:44.34   1st Qu.:6.315   1st Qu.:5.737  
 Median :45.25   Median :6.751   Median :5.832  
 Mean   :45.91   Mean   :7.011   Mean   :5.870  
 3rd Qu.:46.31   3rd Qu.:7.687   3rd Qu.:5.978  
 Max.   :51.91   Max.   :9.139   Max.   :6.336  

$previous_B
   On_Test_WT     On-Test_ADG      On-Test_Days    Carcass_BF10  
 Min.   :20.88   Min.   :0.3454   Min.   : 91.0   Min.   :0.700  
 1st Qu.:36.65   1st Qu.:0.6909   1st Qu.:105.0   1st Qu.:1.000  
 Median :42.22   Median :0.7567   Median :115.0   Median :1.150  
 Mean   :42.33   Mean   :0.7656   Mean   :117.9   Mean   :1.194  
 3rd Qu.:47.90   3rd Qu.:0.8398   3rd Qu.:127.0   3rd Qu.:1.300  
 Max.   :65.19   Max.   :1.1324   Max.   :155.0   Max.   :2.100  
 NA's   :26      NA's   :34       NA's   :34      NA's   :290    
  Carcass_LEA      48h_Loin_pH     Visual_Color   Visual_Marbling
 Min.   : 4.050   Min.   :5.430   Min.   :2.000   Min.   :1.000  
 1st Qu.: 5.969   1st Qu.:5.670   1st Qu.:3.000   1st Qu.:1.500  
 Median : 6.700   Median :5.755   Median :3.500   Median :2.000  
 Mean   : 6.804   Mean   :5.775   Mean   :3.396   Mean   :2.205  
 3rd Qu.: 7.556   3rd Qu.:5.860   3rd Qu.:4.000   3rd Qu.:2.500  
 Max.   :10.675   Max.   :6.390   Max.   :6.000   Max.   :4.500  
 NA's   :41       NA's   :41      NA's   :41      NA's   :41     
   Minolta_L*      Thaw_Purge        Instron_kg   
 Min.   :36.30   Min.   : 0.4562   Min.   :2.986  
 1st Qu.:44.26   1st Qu.: 3.1582   1st Qu.:4.074  
 Median :46.58   Median : 4.6531   Median :4.539  
 Mean   :46.93   Mean   : 4.9595   Mean   :4.580  
 3rd Qu.:49.59   3rd Qu.: 6.4010   3rd Qu.:5.033  
 Max.   :58.45   Max.   :14.7000   Max.   :6.757  
 NA's   :41      NA's   :43        NA's   :41     

$previous_G
   On_Test_WT     On-Test_ADG      On-Test_Days    Carcass_BF10   
 Min.   :14.51   Min.   :0.4223   Min.   : 91.0   Min.   :0.5000  
 1st Qu.:33.60   1st Qu.:0.6403   1st Qu.:113.0   1st Qu.:0.7000  
 Median :37.73   Median :0.7037   Median :120.0   Median :0.8000  
 Mean   :39.23   Mean   :0.7119   Mean   :120.7   Mean   :0.8097  
 3rd Qu.:44.72   3rd Qu.:0.7859   3rd Qu.:128.0   3rd Qu.:0.9000  
 Max.   :67.19   Max.   :0.9947   Max.   :155.0   Max.   :1.6000  
 NA's   :5       NA's   :7        NA's   :7       NA's   :105     
  Carcass_LEA      48h_Loin_pH     Visual_Color   Visual_Marbling
 Min.   : 4.450   Min.   :5.460   Min.   :2.000   Min.   :1.000  
 1st Qu.: 6.581   1st Qu.:5.666   1st Qu.:2.500   1st Qu.:1.500  
 Median : 7.350   Median :5.755   Median :3.000   Median :1.500  
 Mean   : 7.457   Mean   :5.763   Mean   :3.284   Mean   :1.809  
 3rd Qu.: 8.287   3rd Qu.:5.830   3rd Qu.:4.000   3rd Qu.:2.000  
 Max.   :11.050   Max.   :6.370   Max.   :5.000   Max.   :4.000  
 NA's   :6        NA's   :6       NA's   :6       NA's   :6      
   Minolta_L*      Thaw_Purge        Instron_kg   
 Min.   :37.11   Min.   : 0.6135   Min.   :3.057  
 1st Qu.:43.49   1st Qu.: 3.1230   1st Qu.:4.313  
 Median :46.07   Median : 4.8480   Median :4.746  
 Mean   :46.07   Mean   : 5.1080   Mean   :4.790  
 3rd Qu.:48.82   3rd Qu.: 6.8257   3rd Qu.:5.216  
 Max.   :57.33   Max.   :14.4139   Max.   :6.767  
 NA's   :6       NA's   :6         NA's   :6