Load data

First we load the data and perform a few data object checks

rm(list=ls())
setwd("C:\\Users\\steibelj\\OneDrive\\Documents\\kaitlin")
load("master_data_new.Rdata")
ls()
## [1] "master_data_new"
class(master_data_new) # we are working with a data.frame
## [1] "data.frame"
dim(master_data_new) #why so many rows? (more than animals)
## [1] 7997   28
colnames(master_data_new) 
##  [1] "Animal"       "Rep"          "Period"       "Measure"     
##  [5] "Mark"         "Litter"       "Current_pen"  "Current_room"
##  [9] "Sex"          "Weight"       "Obs"          "Front_ls"    
## [13] "Middle_ls"    "Rear_ls"      "Total_ls"     "Dam"         
## [17] "Sire"         "Birthdate"    "Nurs_wt_date" "Nurs_wt"     
## [21] "Fin_wt_date"  "Fin_wt"       "Sow_wt_date"  "Sow_wt"      
## [25] "Exit_wt_date" "Exit_wt"      "BF"           "LMA"
head(master_data_new)
##   Animal Rep   Period Measure Mark Litter Current_pen Current_room Sex
## 1      1   2  Nursery    Post    1     37         ML1           N2   g
## 2      1   2      Sow     Pre    1     37           4           F4   g
## 3      1   2  Nursery     Pre    1     37         ML1           N2   g
## 4      1   2 Finisher  Stable    4     37           5           F4   g
## 5      1   2  Nursery  Stable    2     37         ML1           N2   g
## 6      1   2 Finisher    Post    1     37           5           F4   g
##   Weight Obs Front_ls Middle_ls Rear_ls Total_ls   Dam       Sire
## 1   10.8  KW        6         2       2       10 Y1704 Bold Force
## 2  173.0  KW        6         8       4       18 Y1704 Bold Force
## 3   10.8  JS        1         0       0        1 Y1704 Bold Force
## 4   45.8  JS       19         7      10       36 Y1704 Bold Force
## 5   10.8  KW        6        11       7       24 Y1704 Bold Force
## 6   45.8  KW       63        74      28      165 Y1704 Bold Force
##    Birthdate Nurs_wt_date Nurs_wt Fin_wt_date Fin_wt Sow_wt_date Sow_wt
## 1 2014-03-05   2014-03-25    10.8  2014-05-12   45.8  2014-07-28    173
## 2 2014-03-05   2014-03-25    10.8  2014-05-12   45.8  2014-07-28    173
## 3 2014-03-05   2014-03-25    10.8  2014-05-12   45.8  2014-07-28    173
## 4 2014-03-05   2014-03-25    10.8  2014-05-12   45.8  2014-07-28    173
## 5 2014-03-05   2014-03-25    10.8  2014-05-12   45.8  2014-07-28    173
## 6 2014-03-05   2014-03-25    10.8  2014-05-12   45.8  2014-07-28    173
##   Exit_wt_date Exit_wt  BF   LMA
## 1   2014-09-02     234 1.2 52.48
## 2   2014-09-02     234 1.2 52.48
## 3   2014-09-02     234 1.2 52.48
## 4   2014-09-02     234 1.2 52.48
## 5   2014-09-02     234 1.2 52.48
## 6   2014-09-02     234 1.2 52.48

It is clear that each animal is logged several times (Period: three levels and Measure: three levels). Let’s extract only the rows corresponding to nursery and pre that will contain the largest number of animals and let’s only get the rows pertaining growth and carcass phenotypes (and relevant covariates/factors).

nurs<-master_data_new[master_data_new$Period=="Nursery",]
nurs_pre<-nurs[nurs$Measure=="Pre",]
dts<-nurs_pre[,c(1,2,5,6,9,18:28)]
head(dts)
##    Animal Rep Mark Litter Sex  Birthdate Nurs_wt_date Nurs_wt Fin_wt_date
## 3       1   2    1     37   g 2014-03-05   2014-03-25    10.8  2014-05-12
## 14      2   2    1     37   b 2014-03-05   2014-03-25    11.6  2014-05-12
## 23      3   2    1     37   g 2014-03-05   2014-03-25    12.6  2014-05-12
## 27      4   2    2     37   b 2014-03-05   2014-03-25    11.0  2014-05-12
## 38      5   2    2     37   g 2014-03-05   2014-03-25    12.3  2014-05-12
## 46      7   2    1     37   g 2014-03-05   2014-03-25     8.4  2014-05-12
##    Fin_wt Sow_wt_date Sow_wt Exit_wt_date Exit_wt  BF   LMA
## 3    45.8  2014-07-28    173   2014-09-02     234 1.2 52.48
## 14   46.2        <NA>      .   2014-08-05     200 1.1 43.62
## 23   48.2  2014-07-28    162   2014-09-02     237 1.1 56.86
## 27   50.8        <NA>      .   2014-08-05     218 1.2 41.82
## 38     49  2014-07-28    183   2014-09-02     259 1.4 49.92
## 46   45.5  2014-07-28    175   2014-09-02     252 1.2  51.5
head(dts$Nurs_wt)
## [1] 10.8 11.6 12.6 11.0 12.3  8.4
head(dts$Nurs_wt_date)
## [1] "2014-03-25" "2014-03-25" "2014-03-25" "2014-03-25" "2014-03-25"
## [6] "2014-03-25"
head(dts$LMA)
## [1] "52.48" "43.62" "56.86" "41.82" "49.92" "51.5"
head(table(dts$Exit_wt))
## 
##     *     .   139   173 173.5   174 
##    26     2     1     1     1     1

There are clear type inconsistencies: Some of the numeric (real valued) variables are stored as text. missing values are “*" or “.” and not NA. This may be problematic down the road.

Let’s proceed with a descriptive analysis. We can solve these issues as we go, but let’s go with descriptive analyses

rm(master_data_new,nurs,nurs_pre) #cleanup first
#missing data patterns
table(is.na(as.numeric(dts$Nurs_wt)),dts$Rep,dts$Sex) #nursery
## , ,  = b
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 54 70 70 70 65 69 70 69
## 
## , ,  = g
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 59 71 70 70 68 70 70 71
table(is.na(as.numeric(dts$Fin_wt)),dts$Rep,dts$Sex) #finisher
## , ,  = b
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 54 70 70 70 65 69 70 69
## 
## , ,  = g
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 59 71 70 70 68 70 70 71
table(is.na(as.numeric(dts$Sow_wt)),dts$Rep,dts$Sex) #sow
## , ,  = b
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 54 70  0  0 63  0 70 69
##   TRUE   0  0 70 70  2 69  0  0
## 
## , ,  = g
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 59 71 64 61 65 70 70 71
##   TRUE   0  0  6  9  3  0  0  0

This is extremely odd: Barrows with “sow weights”? mmm… A few more missing data patterns

table(is.na(as.numeric(dts$Exit_wt)),dts$Rep,dts$Sex) #sow
## Warning in table(is.na(as.numeric(dts$Exit_wt)), dts$Rep, dts$Sex): NAs
## introduced by coercion
## , ,  = b
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 54 66 69 67 62 64 67 62
##   TRUE   0  4  1  3  3  5  3  7
## 
## , ,  = g
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 58 69 70  0 63 66 65 65
##   TRUE   1  2  0 70  5  4  5  6
table(is.na(as.numeric(dts$BF)),dts$Rep,dts$Sex) #sow
## Warning in table(is.na(as.numeric(dts$BF)), dts$Rep, dts$Sex): NAs
## introduced by coercion
## , ,  = b
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 54 66 68 66 62 64 67 62
##   TRUE   0  4  2  4  3  5  3  7
## 
## , ,  = g
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 58 69 70  0 63 66 65 65
##   TRUE   1  2  0 70  5  4  5  6
table(is.na(as.numeric(dts$LMA)),dts$Rep,dts$Sex) #sow
## Warning in table(is.na(as.numeric(dts$LMA)), dts$Rep, dts$Sex): NAs
## introduced by coercion
## , ,  = b
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 54 66 68 66 62 64 67 62
##   TRUE   0  4  2  4  3  5  3  7
## 
## , ,  = g
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 58 69 70  0 63 66 65 65
##   TRUE   1  2  0 70  5  4  5  6

Whole sow data from rep 5 is missing.We can look into the variability in age and days between different measurements.

age_nurs<-as.numeric(dts$Nurs_wt_date-dts$Birthdate)
by(age_nurs,dts$Rep,summary)
## dts$Rep: 2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.00   21.00   22.00   21.56   22.00   24.00 
## -------------------------------------------------------- 
## dts$Rep: 3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -4360.0    23.0    24.0  -722.2    24.0    26.0 
## -------------------------------------------------------- 
## dts$Rep: 4
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   20.00   20.00   20.05   21.00   21.00 
## -------------------------------------------------------- 
## dts$Rep: 5
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   22.00   23.00   24.00   24.24   25.00   30.00       1 
## -------------------------------------------------------- 
## dts$Rep: 6
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   17.00   18.00   20.00   20.26   22.00   26.00 
## -------------------------------------------------------- 
## dts$Rep: 7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   21.00   22.00   23.00   22.65   23.00   25.00 
## -------------------------------------------------------- 
## dts$Rep: 8
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -71.00   19.00   20.00   16.81   20.00   21.00 
## -------------------------------------------------------- 
## dts$Rep: 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    17.0    18.0    20.0    19.8    21.0    22.0

There are some errors in the dates.

head(dts[age_nurs<=0,])
##      Animal Rep Mark Litter Sex  Birthdate Nurs_wt_date Nurs_wt
## 1601    258   3    7     51   b 2026-05-17   2014-06-09    16.0
## 1611    260   3    7     51   b 2026-05-17   2014-06-09    18.6
## 1615    263   3    8     51   g 2026-05-17   2014-06-09    18.0
## 1623    264   3    8     51   b 2026-05-17   2014-06-09    18.1
## 1634    265   3    9     51   g 2026-05-17   2014-06-09    17.6
## 1640    266   3    8     51   b 2026-05-17   2014-06-09    15.6
##      Fin_wt_date Fin_wt Sow_wt_date Sow_wt Exit_wt_date Exit_wt  BF   LMA
## 1601  2014-07-21     47        <NA>      .   2014-10-20     248 1.7 53.67
## 1611  2014-07-21     64        <NA>      .   2014-10-20     262 2.8 45.17
## 1615  2014-07-21     57  2014-10-21    244   2014-11-18     274 1.4 64.44
## 1623  2014-07-21     55        <NA>      .   2014-10-20     230 2.2 47.51
## 1634  2014-07-21     61  2014-10-21    236   2014-11-18     268 1.9 53.49
## 1640  2014-07-21     42        <NA>      .   2014-10-20     239 1.9 50.59

Errors seem to be due to a wrong birthday date. This need to be thoroughtly checked (whole spreadsheet) and corrected ASAP.

nurs_fin_days<-as.numeric(dts$Fin_wt_date-dts$Nurs_wt_date)
by(nurs_fin_days,dts$Rep,summary)
## dts$Rep: 2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      48      48      48      48      48      48 
## -------------------------------------------------------- 
## dts$Rep: 3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      42      42      42      42      42      42 
## -------------------------------------------------------- 
## dts$Rep: 4
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      52      52      52      52      52      52 
## -------------------------------------------------------- 
## dts$Rep: 5
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      42      42      42      42      42      42       1 
## -------------------------------------------------------- 
## dts$Rep: 6
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      49      49      49      49      49      49 
## -------------------------------------------------------- 
## dts$Rep: 7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      42      42      42      42      42      42 
## -------------------------------------------------------- 
## dts$Rep: 8
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      45      45      45      45      45      45 
## -------------------------------------------------------- 
## dts$Rep: 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      45      45      45      45      45      45

looks good: each rep processed in 1 day

sow_fin_days<-as.numeric(dts$Sow_wt_date-dts$Fin_wt_date)
by(sow_fin_days,dts$Rep,summary)
## dts$Rep: 2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      77      77      77      77      77      77      54 
## -------------------------------------------------------- 
## dts$Rep: 3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      92      92      92      92      92      92      70 
## -------------------------------------------------------- 
## dts$Rep: 4
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      77      77      77      77      77      77      70 
## -------------------------------------------------------- 
## dts$Rep: 5
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      85      85      85      85      85      85      72 
## -------------------------------------------------------- 
## dts$Rep: 6
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      78      78      78      78      78      78      68 
## -------------------------------------------------------- 
## dts$Rep: 7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      78      78      78      78      78      78      69 
## -------------------------------------------------------- 
## dts$Rep: 8
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      87      87      87      87      87      87      70 
## -------------------------------------------------------- 
## dts$Rep: 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      71      71      71      71      71      71      74
sow_exit_days<-as.numeric(dts$Exit_wt_date-dts$Sow_wt_date)
by(sow_exit_days,dts$Rep,summary)
## dts$Rep: 2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      36      36      36      36      36      36      55 
## -------------------------------------------------------- 
## dts$Rep: 3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      28      28      28      28      28      28      72 
## -------------------------------------------------------- 
## dts$Rep: 4
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      29      29      29      29      29      29      70 
## -------------------------------------------------------- 
## dts$Rep: 5
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      NA      NA      NA     NaN      NA      NA     140 
## -------------------------------------------------------- 
## dts$Rep: 6
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      30      30      30      30      30      30      70 
## -------------------------------------------------------- 
## dts$Rep: 7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      28      28      28      28      28      28      73 
## -------------------------------------------------------- 
## dts$Rep: 8
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      32      32      32      32      32      32      75 
## -------------------------------------------------------- 
## dts$Rep: 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      35      35      35      35      35      35      76
nurs_exit_days<-as.numeric(dts$Exit_wt_date-dts$Nurs_wt_date)
by(nurs_exit_days,dts$Rep,summary)
## dts$Rep: 2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   133.0   133.0   161.0   147.5   161.0   161.0       1 
## -------------------------------------------------------- 
## dts$Rep: 3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   133.0   133.0   162.0   147.8   162.0   162.0       6 
## -------------------------------------------------------- 
## dts$Rep: 4
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   133.0   133.0   158.0   145.6   158.0   158.0       1 
## -------------------------------------------------------- 
## dts$Rep: 5
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     120     120     120     120     120     120      74 
## -------------------------------------------------------- 
## dts$Rep: 6
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   140.0   140.0   148.5   148.5   157.0   157.0       7 
## -------------------------------------------------------- 
## dts$Rep: 7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   121.0   121.0   134.5   134.5   148.0   148.0       7 
## -------------------------------------------------------- 
## dts$Rep: 8
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   116.0   116.0   116.0   139.6   164.0   164.0       8 
## -------------------------------------------------------- 
## dts$Rep: 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   117.0   117.0   151.0   134.1   151.0   151.0      13
aggregate(nurs_exit_days~dts$Sex:dts$Rep,FUN=function(x) c(min(x,na.rm=T),mean(x,na.rm=T),max(x,na.rm=T)))
##    dts$Sex dts$Rep nurs_exit_days.1 nurs_exit_days.2 nurs_exit_days.3
## 1        b       2         133.0000         133.0000         133.0000
## 2        g       2         161.0000         161.0000         161.0000
## 3        b       3         133.0000         133.0000         133.0000
## 4        g       3         162.0000         162.0000         162.0000
## 5        b       4         133.0000         133.0000         133.0000
## 6        g       4         158.0000         158.0000         158.0000
## 7        b       5         120.0000         120.0000         120.0000
## 8        b       6         140.0000         140.0000         140.0000
## 9        g       6         157.0000         157.0000         157.0000
## 10       b       7         121.0000         121.0000         121.0000
## 11       g       7         148.0000         148.0000         148.0000
## 12       b       8         116.0000         116.0000         116.0000
## 13       g       8         164.0000         164.0000         164.0000
## 14       b       9         117.0000         117.5484         151.0000
## 15       g       9         117.0000         149.9538         151.0000

Conclussion: if all periods are uniform in length within rep and sex, then we only need to include Rep and Sex effects to correct for those. In particular rep:sex interaction would take care of it all.Weight gains should be computed with the correct length of time and the rep:sex could be ignored if we assume that weight gain was constant.

Something to think about is differential weight gain with age and initial weight (compensatory growth).

nurs_exit_WG<-as.numeric(dts$Exit_wt)-as.numeric(dts$Nurs_wt)
## Warning: NAs introduced by coercion
plot(nurs_exit_WG~nurs_exit_days,
     col=c("red","black")[((dts$Sex=="b")+1)])

This is very suspicious. Are there switched sex labels? I think so. Also notice that the WG is not linear with days nurs-exit.

sow_nurs_days<-as.numeric(dts$Sow_wt_date-dts$Nurs_wt_date)
by(sow_nurs_days,dts$Rep,summary)
## dts$Rep: 2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     125     125     125     125     125     125      54 
## -------------------------------------------------------- 
## dts$Rep: 3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     134     134     134     134     134     134      70 
## -------------------------------------------------------- 
## dts$Rep: 4
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     129     129     129     129     129     129      70 
## -------------------------------------------------------- 
## dts$Rep: 5
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     127     127     127     127     127     127      72 
## -------------------------------------------------------- 
## dts$Rep: 6
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     127     127     127     127     127     127      68 
## -------------------------------------------------------- 
## dts$Rep: 7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     120     120     120     120     120     120      69 
## -------------------------------------------------------- 
## dts$Rep: 8
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     132     132     132     132     132     132      70 
## -------------------------------------------------------- 
## dts$Rep: 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     116     116     116     116     116     116      74