Load data

First we load the data and perform a few data object checks

rm(list=ls())

setwd("C:\\Users\\steibelj\\OneDrive\\Documents\\kaitlin")
load("master_data_new.Rdata")
ls()

## [1] "master_data_new"

class(master_data_new) # we are working with a data.frame

## [1] "data.frame"

dim(master_data_new) #why so many rows? (more than animals)

## [1] 8009   28

colnames(master_data_new)

##  [1] "Animal"       "Rep"          "Period"       "Measure"     
##  [5] "Mark"         "Litter"       "Current_pen"  "Current_room"
##  [9] "Sex"          "Weight"       "Obs"          "Front_ls"    
## [13] "Middle_ls"    "Rear_ls"      "Total_ls"     "Dam"         
## [17] "Sire"         "Birthdate"    "Nurs_wt_date" "Nurs_wt"     
## [21] "Fin_wt_date"  "Fin_wt"       "Sow_wt_date"  "Sow_wt"      
## [25] "Exit_wt_date" "Exit_wt"      "BF"           "LMA"

head(master_data_new)

##   Animal Rep   Period Measure Mark Litter Current_pen Current_room Sex
## 1      1   2  Nursery    Post    1     37         ML1           N2   g
## 2      1   2      Sow     Pre    1     37           4           F4   g
## 3      1   2  Nursery     Pre    1     37         ML1           N2   g
## 4      1   2 Finisher  Stable    4     37           5           F4   g
## 5      1   2  Nursery  Stable    2     37         ML1           N2   g
## 6      1   2 Finisher    Post    1     37           5           F4   g
##   Weight Obs Front_ls Middle_ls Rear_ls Total_ls   Dam       Sire
## 1   10.8  KW        6         2       2       10 Y1704 Bold Force
## 2  173.0  KW        6         8       4       18 Y1704 Bold Force
## 3   10.8  JS        1         0       0        1 Y1704 Bold Force
## 4   45.8  JS       19         7      10       36 Y1704 Bold Force
## 5   10.8  KW        6        11       7       24 Y1704 Bold Force
## 6   45.8  KW       63        74      28      165 Y1704 Bold Force
##    Birthdate Nurs_wt_date Nurs_wt Fin_wt_date Fin_wt Sow_wt_date Sow_wt
## 1 2014-03-05   2014-03-25    10.8  2014-05-12   45.8  2014-07-28    173
## 2 2014-03-05   2014-03-25    10.8  2014-05-12   45.8  2014-07-28    173
## 3 2014-03-05   2014-03-25    10.8  2014-05-12   45.8  2014-07-28    173
## 4 2014-03-05   2014-03-25    10.8  2014-05-12   45.8  2014-07-28    173
## 5 2014-03-05   2014-03-25    10.8  2014-05-12   45.8  2014-07-28    173
## 6 2014-03-05   2014-03-25    10.8  2014-05-12   45.8  2014-07-28    173
##   Exit_wt_date Exit_wt  BF   LMA
## 1   2014-09-02     234 1.2 52.48
## 2   2014-09-02     234 1.2 52.48
## 3   2014-09-02     234 1.2 52.48
## 4   2014-09-02     234 1.2 52.48
## 5   2014-09-02     234 1.2 52.48
## 6   2014-09-02     234 1.2 52.48

It is clear that each animal is logged several times (Period: three levels and Measure: three levels). Let’s extract only the rows corresponding to nursery and pre that will contain the largest number of animals and let’s only get the rows pertaining growth and carcass phenotypes (and relevant covariates/factors).

nurs<-master_data_new[master_data_new$Period=="Nursery",]
nurs_pre<-nurs[nurs$Measure=="Pre",]
dts<-nurs_pre[,c(1,2,5,6,9,18:28)]
head(dts)

##    Animal Rep Mark Litter Sex  Birthdate Nurs_wt_date Nurs_wt Fin_wt_date
## 3       1   2    1     37   g 2014-03-05   2014-03-25    10.8  2014-05-12
## 14      2   2    1     37   b 2014-03-05   2014-03-25    11.6  2014-05-12
## 23      3   2    1     37   g 2014-03-05   2014-03-25    12.6  2014-05-12
## 27      4   2    2     37   b 2014-03-05   2014-03-25    11.0  2014-05-12
## 38      5   2    2     37   g 2014-03-05   2014-03-25    12.3  2014-05-12
## 40      6   2    1     37   b 2014-03-05   2014-03-25     9.2        <NA>
##    Fin_wt Sow_wt_date Sow_wt Exit_wt_date Exit_wt  BF   LMA
## 3    45.8  2014-07-28    173   2014-09-02     234 1.2 52.48
## 14   46.2        <NA>     NA   2014-08-05     200 1.1 43.62
## 23   48.2  2014-07-28    162   2014-09-02     237 1.1 56.86
## 27   50.8        <NA>     NA   2014-08-05     218 1.2 41.82
## 38   49.0  2014-07-28    183   2014-09-02     259 1.4 49.92
## 40     NA        <NA>     NA         <NA>      NA  NA    NA

head(dts$Nurs_wt)

## [1] 10.8 11.6 12.6 11.0 12.3  9.2

head(dts$Nurs_wt_date)

## [1] "2014-03-25" "2014-03-25" "2014-03-25" "2014-03-25" "2014-03-25"
## [6] "2014-03-25"

head(dts$LMA)

## [1] 52.48 43.62 56.86 41.82 49.92    NA

head(table(dts$Exit_wt))

## 
##   139   173 173.5   174   176   177 
##     1     1     1     1     3     1

Type inconsistencies have been fixed.

Let’s proceed with a descriptive analysis.

rm(master_data_new,nurs,nurs_pre) #cleanup first
#missing data patterns
table(is.na(as.numeric(dts$Nurs_wt)),dts$Rep,dts$Sex) #nursery

## , ,  = b
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 60 70 70 70 65 70 70 70
## 
## , ,  = g
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 60 70 70 70 68 70 70 70

table(is.na(as.numeric(dts$Fin_wt)),dts$Rep,dts$Sex) #finisher

## , ,  = b
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 55 69 69 70 64 67 69 67
##   TRUE   5  1  1  0  1  3  1  3
## 
## , ,  = g
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 59 70 70 69 67 69 70 70
##   TRUE   1  0  0  1  1  1  0  0

table(is.na(as.numeric(dts$Sow_wt)),dts$Rep,dts$Sex) #sow

## , ,  = b
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE  0  0  0  0  0  0  0  0
##   TRUE  60 70 70 70 65 70 70 70
## 
## , ,  = g
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 58 69 70 68 63 67 66 66
##   TRUE   2  1  0  2  5  3  4  4

There are no Barrows with “sow weights” A few more missing data patterns

table(is.na(as.numeric(dts$Exit_wt)),dts$Rep,dts$Sex)

## , ,  = b
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 54 66 69 67 62 65 67 63
##   TRUE   6  4  1  3  3  5  3  7
## 
## , ,  = g
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 58 68 70 64 63 66 65 64
##   TRUE   2  2  0  6  5  4  5  6

table(is.na(as.numeric(dts$BF)),dts$Rep,dts$Sex)

## , ,  = b
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 54 66 68 66 62 65 67 63
##   TRUE   6  4  2  4  3  5  3  7
## 
## , ,  = g
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 58 68 70 64 63 66 65 64
##   TRUE   2  2  0  6  5  4  5  6

table(is.na(as.numeric(dts$LMA)),dts$Rep,dts$Sex)

## , ,  = b
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 54 66 68 66 62 65 67 63
##   TRUE   6  4  2  4  3  5  3  7
## 
## , ,  = g
## 
##        
##          2  3  4  5  6  7  8  9
##   FALSE 58 68 70 64 63 66 65 64
##   TRUE   2  2  0  6  5  4  5  6

sow data from rep 5 is now included.We can look into the variability in age and days between different measurements.

age_nurs<-as.numeric(dts$Nurs_wt_date-dts$Birthdate)
by(age_nurs,dts$Rep,summary)

## dts$Rep: 2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.00   21.00   22.00   21.58   22.00   24.00 
## -------------------------------------------------------- 
## dts$Rep: 3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   22.00   23.00   24.00   23.86   24.00   26.00 
## -------------------------------------------------------- 
## dts$Rep: 4
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   20.00   20.00   20.05   21.00   21.00 
## -------------------------------------------------------- 
## dts$Rep: 5
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   22.00   23.00   24.00   24.24   25.00   30.00       1 
## -------------------------------------------------------- 
## dts$Rep: 6
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   17.00   18.00   20.00   20.26   22.00   26.00 
## -------------------------------------------------------- 
## dts$Rep: 7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   21.00   22.00   23.00   22.66   23.00   25.00 
## -------------------------------------------------------- 
## dts$Rep: 8
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   16.00   19.00   20.00   19.44   20.00   21.00 
## -------------------------------------------------------- 
## dts$Rep: 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    17.0    18.0    20.0    19.8    21.0    22.0

Errors in the dates seem to have been fixed

nurs_fin_days<-as.numeric(dts$Fin_wt_date-dts$Nurs_wt_date)
by(nurs_fin_days,dts$Rep,summary)

## dts$Rep: 2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      48      48      48      48      48      48       6 
## -------------------------------------------------------- 
## dts$Rep: 3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      42      42      42      42      42      42 
## -------------------------------------------------------- 
## dts$Rep: 4
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      52      52      52      52      52      52 
## -------------------------------------------------------- 
## dts$Rep: 5
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      42      42      42      42      42      42       1 
## -------------------------------------------------------- 
## dts$Rep: 6
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      49      49      49      49      49      49 
## -------------------------------------------------------- 
## dts$Rep: 7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      42      42      42      42      42      42       4 
## -------------------------------------------------------- 
## dts$Rep: 8
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      45      45      45      45      45      45       1 
## -------------------------------------------------------- 
## dts$Rep: 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      45      45      45      45      45      45       3

looks good: each rep processed in 1 day

sow_fin_days<-as.numeric(dts$Sow_wt_date-dts$Fin_wt_date)
by(sow_fin_days,dts$Rep,summary)

## dts$Rep: 2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      77      77      77      77      77      77      61 
## -------------------------------------------------------- 
## dts$Rep: 3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      92      92      92      92      92      92      70 
## -------------------------------------------------------- 
## dts$Rep: 4
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      77      77      77      77      77      77      70 
## -------------------------------------------------------- 
## dts$Rep: 5
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      85      85      85      85      85      85      72 
## -------------------------------------------------------- 
## dts$Rep: 6
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      78      78      78      78      78      78      70 
## -------------------------------------------------------- 
## dts$Rep: 7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      78      78      78      78      78      78      73 
## -------------------------------------------------------- 
## dts$Rep: 8
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      87      87      87      87      87      87      74 
## -------------------------------------------------------- 
## dts$Rep: 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      71      71      71      71      71      71      74

sow_exit_days<-as.numeric(dts$Exit_wt_date-dts$Sow_wt_date)
by(sow_exit_days,dts$Rep,summary)

## dts$Rep: 2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      36      36      36      36      36      36      62 
## -------------------------------------------------------- 
## dts$Rep: 3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      28      28      28      28      28      28      72 
## -------------------------------------------------------- 
## dts$Rep: 4
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      29      29      29      29      29      29      70 
## -------------------------------------------------------- 
## dts$Rep: 5
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      23      23      23      23      23      23      76 
## -------------------------------------------------------- 
## dts$Rep: 6
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      30      30      30      30      30      30      70 
## -------------------------------------------------------- 
## dts$Rep: 7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      28      28      28      28      28      28      74 
## -------------------------------------------------------- 
## dts$Rep: 8
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      32      32      32      32      32      32      75 
## -------------------------------------------------------- 
## dts$Rep: 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      35      35      35      35      35      35      76

nurs_exit_days<-as.numeric(dts$Exit_wt_date-dts$Nurs_wt_date)
by(nurs_exit_days,dts$Rep,summary)

## dts$Rep: 2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   133.0   133.0   161.0   147.5   161.0   161.0       8 
## -------------------------------------------------------- 
## dts$Rep: 3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   133.0   133.0   162.0   147.7   162.0   162.0       6 
## -------------------------------------------------------- 
## dts$Rep: 4
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   133.0   133.0   158.0   145.6   158.0   158.0       1 
## -------------------------------------------------------- 
## dts$Rep: 5
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   120.0   120.0   120.0   134.8   150.0   150.0      10 
## -------------------------------------------------------- 
## dts$Rep: 6
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   140.0   140.0   148.5   148.5   157.0   157.0       7 
## -------------------------------------------------------- 
## dts$Rep: 7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   121.0   121.0   134.5   134.5   148.0   148.0       8 
## -------------------------------------------------------- 
## dts$Rep: 8
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   116.0   116.0   116.0   139.6   164.0   164.0       8 
## -------------------------------------------------------- 
## dts$Rep: 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   117.0   117.0   151.0   134.1   151.0   151.0      13

aggregate(nurs_exit_days~dts$Sex:dts$Rep,FUN=function(x) c(min(x,na.rm=T),mean(x,na.rm=T),max(x,na.rm=T)))

##    dts$Sex dts$Rep nurs_exit_days.1 nurs_exit_days.2 nurs_exit_days.3
## 1        b       2              133              133              133
## 2        g       2              161              161              161
## 3        b       3              133              133              133
## 4        g       3              162              162              162
## 5        b       4              133              133              133
## 6        g       4              158              158              158
## 7        b       5              120              120              120
## 8        g       5              150              150              150
## 9        b       6              140              140              140
## 10       g       6              157              157              157
## 11       b       7              121              121              121
## 12       g       7              148              148              148
## 13       b       8              116              116              116
## 14       g       8              164              164              164
## 15       b       9              117              117              117
## 16       g       9              151              151              151

Conclussion: if all periods are uniform in length within rep and sex, then we only need to include Rep and Sex effects to correct for those. In particular rep:sex interaction would take care of it all.Weight gains should be computed with the correct length of time and the rep:sex could be ignored if we assume that weight gain was constant.

Something to think about is differential weight gain with age and initial weight (compensatory growth).

nurs_exit_WG<-as.numeric(dts$Exit_wt)-as.numeric(dts$Nurs_wt)

plot(nurs_exit_WG~nurs_exit_days,
     col=c("red","black")[((dts$Sex=="b")+1)])

Graphic looks good now. no switched sex labels. Also notice that the WG is not linear with days nurs-exit.

sow_nurs_days<-as.numeric(dts$Sow_wt_date-dts$Nurs_wt_date)
by(sow_nurs_days,dts$Rep,summary)

## dts$Rep: 2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     125     125     125     125     125     125      61 
## -------------------------------------------------------- 
## dts$Rep: 3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     134     134     134     134     134     134      70 
## -------------------------------------------------------- 
## dts$Rep: 4
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     129     129     129     129     129     129      70 
## -------------------------------------------------------- 
## dts$Rep: 5
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     127     127     127     127     127     127      72 
## -------------------------------------------------------- 
## dts$Rep: 6
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     127     127     127     127     127     127      70 
## -------------------------------------------------------- 
## dts$Rep: 7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     120     120     120     120     120     120      73 
## -------------------------------------------------------- 
## dts$Rep: 8
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     132     132     132     132     132     132      74 
## -------------------------------------------------------- 
## dts$Rep: 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     116     116     116     116     116     116      74

Compare now age at exit of barrows and age at beginning of sow stage

exit_birth_days<-as.numeric(dts$Exit_wt_date-dts$Birthdate)
by(exit_birth_days[dts$Sex=="b"],dts$Rep[dts$Sex=="b"],summary)

## dts$Rep[dts$Sex == "b"]: 2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   153.0   154.0   155.0   154.7   155.0   157.0       6 
## -------------------------------------------------------- 
## dts$Rep[dts$Sex == "b"]: 3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     155     156     157     157     158     159       4 
## -------------------------------------------------------- 
## dts$Rep[dts$Sex == "b"]: 4
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     151     153     153     153     154     154       1 
## -------------------------------------------------------- 
## dts$Rep[dts$Sex == "b"]: 5
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   142.0   143.0   144.0   144.2   145.0   150.0       4 
## -------------------------------------------------------- 
## dts$Rep[dts$Sex == "b"]: 6
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     157     158     160     160     162     166       2 
## -------------------------------------------------------- 
## dts$Rep[dts$Sex == "b"]: 7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   142.0   143.0   144.0   143.7   145.0   146.0       4 
## -------------------------------------------------------- 
## dts$Rep[dts$Sex == "b"]: 8
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   132.0   135.0   136.0   135.3   136.0   137.0       3 
## -------------------------------------------------------- 
## dts$Rep[dts$Sex == "b"]: 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   134.0   136.0   137.0   136.8   138.0   139.0       7

by(exit_birth_days[dts$Sex=="g"],dts$Rep[dts$Sex=="g"],summary)

## dts$Rep[dts$Sex == "g"]: 2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   180.0   182.0   182.0   182.4   183.0   184.0       2 
## -------------------------------------------------------- 
## dts$Rep[dts$Sex == "g"]: 3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   184.0   185.0   186.0   185.8   186.0   188.0       2 
## -------------------------------------------------------- 
## dts$Rep[dts$Sex == "g"]: 4
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   176.0   178.0   178.0   178.1   179.0   179.0 
## -------------------------------------------------------- 
## dts$Rep[dts$Sex == "g"]: 5
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   172.0   174.0   174.0   174.3   175.0   180.0       6 
## -------------------------------------------------------- 
## dts$Rep[dts$Sex == "g"]: 6
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   174.0   177.0   177.0   177.6   179.0   183.0       5 
## -------------------------------------------------------- 
## dts$Rep[dts$Sex == "g"]: 7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   169.0   170.0   171.0   170.6   171.0   173.0       4 
## -------------------------------------------------------- 
## dts$Rep[dts$Sex == "g"]: 8
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   180.0   183.0   184.0   183.5   184.0   185.0       5 
## -------------------------------------------------------- 
## dts$Rep[dts$Sex == "g"]: 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     168     169     171     171     172     173       6

sow_birth_days<-as.numeric(dts$Sow_wt_date-dts$Birthdate)
by(sow_birth_days,dts$Rep,summary)

## dts$Rep: 2
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   144.0   146.0   146.0   146.4   147.0   148.0      61 
## -------------------------------------------------------- 
## dts$Rep: 3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   156.0   157.0   158.0   157.8   158.0   160.0      70 
## -------------------------------------------------------- 
## dts$Rep: 4
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   147.0   149.0   149.0   149.1   150.0   150.0      70 
## -------------------------------------------------------- 
## dts$Rep: 5
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   149.0   151.0   151.0   151.3   152.0   157.0      72 
## -------------------------------------------------------- 
## dts$Rep: 6
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   144.0   147.0   147.0   147.6   149.0   153.0      70 
## -------------------------------------------------------- 
## dts$Rep: 7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   141.0   142.0   143.0   142.6   143.0   145.0      73 
## -------------------------------------------------------- 
## dts$Rep: 8
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   148.0   151.0   152.0   151.5   152.0   153.0      74 
## -------------------------------------------------------- 
## dts$Rep: 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   133.0   134.0   136.0   135.9   137.0   138.0      74

dt<-c(exit_birth_days[dts$Sex=="b"],exit_birth_days[dts$Sex=="g"],
      sow_birth_days[dts$Sex=="g"])
rp<-c(dts$Rep[dts$Sex=="b"],dts$Rep[dts$Sex=="g"],dts$Rep[dts$Sex=="g"])
cls<-c(rep("barrow exit",length(exit_birth_days[dts$Sex=="b"])),
       rep("gilt exit",length(exit_birth_days[dts$Sex=="g"])),
       rep("gilt sow",length(exit_birth_days[dts$Sex=="g"])))

by(dt,cls,mean,na.rm=T)

## cls: barrow exit
## [1] 147.9397
## -------------------------------------------------------- 
## cls: gilt exit
## [1] 177.9073
## -------------------------------------------------------- 
## cls: gilt sow
## [1] 147.8847

As expected, the exit weight of barrows is very similar to weight of gilts at the begining of Sow period. Should be combine these twoo instead of just exit weight for barrows and sows? if we combine them, what to do with LMA and BF (only available at exit)?

Fit mixed models to exit WG,LMA and BF

library(gwaR)
library(regress)
library(qvalue)
mp<-read.table("SNP_Map.txt",header=T,sep="\t")
head(mp)

##   Index       Name Chromosome Position GenTrain.Score   SNP ILMN.Strand
## 1     1 1_10573221          1 10573221         0.8320 [T/C]         BOT
## 2     2 1_10673082          1 10673082         0.8076 [T/C]         BOT
## 3     3 1_10723065          1 10723065         0.8107 [A/G]         TOP
## 4     4 1_11337555          1 11337555         0.7925 [A/G]         TOP
## 5     5 1_11407894          1 11407894         0.8670 [A/G]         TOP
## 6     6 1_11426075          1 11426075         0.8675 [T/C]         BOT
##   Customer.Strand NormID
## 1             BOT      0
## 2             BOT      0
## 3             BOT      0
## 4             BOT      0
## 5             TOP      0
## 6             BOT      0

map<-mp[,c(3,4)]
rownames(map)<-mp$Name
colnames(map)<-c("chr","pos")
head(map)

##            chr      pos
## 1_10573221   1 10573221
## 1_10673082   1 10673082
## 1_10723065   1 10723065
## 1_11337555   1 11337555
## 1_11407894   1 11407894
## 1_11426075   1 11426075

load("Z_and_G_matrices.Rdata")

dim(G)

## [1] 1079 1079

dim(Z)

## [1]  1079 52925

dim(map)

## [1] 68516     2

sum(!colnames(Z)%in%rownames(map))

## [1] 0

map<-map[colnames(Z),]
dim(map)

## [1] 52925     2

to_include<-as.character(map$chr)%in%(1:18)
map<-map[to_include,]
Z<-Z[,to_include]
map$chr<-droplevels(map$chr)
#dts<-dts[rownames(dts)!="1158",]
rownames(dts)<-dts$"Animal" #check what is wrong with this variable name

dts$age_exit<-as.numeric(dts$Exit_wt_date-dts$Birthdate)
dts$Exit_wt<-as.numeric(as.character(dts$Exit_wt))
system.time({
  gb<-gblup("Exit_wt",dts,
      c(y~Sex+Rep+Sex:Rep+Nurs_wt+age_exit),
      G,pos=c(T,T))
  })

##    user  system elapsed 
##   16.42    0.31   17.97

print(gb)

## gblup analysis of trait: Exit_wt 
## 
## fixed effects equation:
## y ~ Sex + Rep + Nurs_wt + age_exit + Sex:Rep
## 
## random effects equation:
## ~G + In
## 
## log-likelihood: -3526.022 converged in: 5 iterations 
## 
## estimated fixed effects:
##                 Estimate   StdError    test.st      p.value
## (Intercept) -100.6174328 23.2186655 -4.3334718 1.467761e-05
## Sexg         -13.9966832  4.1992991 -3.3330998 8.588412e-04
## Rep            2.0630817  0.6948136  2.9692594 2.985185e-03
## Nurs_wt        3.4460607  0.2225656 15.4833472 0.000000e+00
## age_exit       1.7731740  0.1377284 12.8744250 0.000000e+00
## Sexg:Rep      -0.4328609  0.5591530 -0.7741368 4.388499e-01
## 
## estimated variance components:
##    Estimate StdError prop.var
## G  184.5469 29.46025 0.433276
## In 241.3869 18.23458 0.566724

sm<-summary(gb,ehat=T)
names(sm)

## [1] "uhat"     "name"     "ehat"     "llik"     "cycle"    "formula" 
## [7] "Vformula"

e<-sm$ehat-sm$uhat
plot(e~dts[rownames(e),"age_exit"])

varcomp(gb)

##    Estimate StdError prop.var         se
## G  184.5469 29.46025 0.433276 0.05885251
## In 241.3869 18.23458 0.566724 0.06285854

#perform GWA
system.time({
  GV<-gwas(gb,t(Z))
})

##    user  system elapsed 
##   64.11    0.82   76.55

summary(GV) #this is not working OK, I guess. Check and compare to getpvalue.

##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      0     0      0     0    0
## attr(,"class")
## [1] "summary.gwas"

#pvalues
pv<-getpvalue(GV,log.p = F)

#manhattan plot and qqplots
manhattan_plot(pv,map, threshold = 0.000001)

qqgplot(pv)

#backfat

dts$BF<-as.numeric(as.character(dts$BF))
system.time({
  gb<-gblup("BF",dts,
      c(y~Sex+Rep+Sex:Rep),
      G,pos=c(T,T))
  })

##    user  system elapsed 
##   20.31    0.26   22.65

print(gb)

## gblup analysis of trait: BF 
## 
## fixed effects equation:
## y ~ Sex + Rep + Sex:Rep
## 
## random effects equation:
## ~G + In
## 
## log-likelihood: 543.5082 converged in: 6 iterations 
## 
## estimated fixed effects:
##                 Estimate    StdError    test.st     p.value
## (Intercept)  1.879118804 0.064917044 28.9464628 0.000000000
## Sexg        -0.157801773 0.056390696 -2.7983654 0.005136197
## Rep         -0.003244479 0.011398314 -0.2846455 0.775915723
## Sexg:Rep    -0.005134445 0.009419989 -0.5450585 0.585713257
## 
## estimated variance components:
##      Estimate    StdError prop.var
## G  0.10010376 0.012725427 0.597646
## In 0.06739298 0.006196953 0.402354

varcomp(gb)

##      Estimate    StdError prop.var         se
## G  0.10010376 0.012725427 0.597646 0.06020371
## In 0.06739298 0.006196953 0.402354 0.05390012

#perform GWA
system.time({
  GV<-gwas(gb,t(Z))
})

##    user  system elapsed 
##   58.23    0.93   61.48

summary(GV) #this is not working OK, I guess. Check and compare to getpvalue.

##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      0     0      0     0    0
## attr(,"class")
## [1] "summary.gwas"

#pvalues
pv<-getpvalue(GV,log.p = F)

#manhattan plot and qqplots
manhattan_plot(pv,map, threshold = 0.000001)

qqgplot(pv)

LMA

dts$LMA<-as.numeric(as.character(dts$LMA))
system.time({
  gb<-gblup("LMA",dts,
      c(y~Sex+Rep+Sex:Rep),
      G,pos=c(T,T))
  })

##    user  system elapsed 
##   18.00    0.19   19.67

print(gb)

## gblup analysis of trait: LMA 
## 
## fixed effects equation:
## y ~ Sex + Rep + Sex:Rep
## 
## random effects equation:
## ~G + In
## 
## log-likelihood: -2181.516 converged in: 5 iterations 
## 
## estimated fixed effects:
##               Estimate  StdError   test.st      p.value
## (Intercept) 47.4745220 0.8542278 55.575950 0.0000000000
## Sexg         7.7204032 0.8218908  9.393466 0.0000000000
## Rep         -0.5205652 0.1489099 -3.495840 0.0004725728
## Sexg:Rep     0.2502317 0.1372969  1.822559 0.0683701898
## 
## estimated variance components:
##    Estimate StdError  prop.var
## G  13.53964 2.132432 0.4406418
## In 17.18745 1.306812 0.5593582

varcomp(gb)

##    Estimate StdError  prop.var         se
## G  13.53964 2.132432 0.4406418 0.05886784
## In 17.18745 1.306812 0.5593582 0.06243765

#perform GWA
system.time({
  GV<-gwas(gb,t(Z))
})

##    user  system elapsed 
##   61.19    0.69   66.06

summary(GV) #this is not working OK, I guess. Check and compare to getpvalue.

##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      0     0      0     0    0
## attr(,"class")
## [1] "summary.gwas"

#pvalues
pv<-getpvalue(GV,log.p = F)

#manhattan plot and qqplots
manhattan_plot(pv,map, threshold = 0.000001)

qqgplot(pv)

There is no association signal for any component. But there is a very strong h2. Perhaps such estimated is inflated due to pen effects, etc, that are difficult to control for with multiple mixings. If that is the case, then the indirect genetics effects should absorb a large proportion of variance.

phenotype_carcass<-dts
save(phenotype_carcass,file="carcass.Rdata")

Exploratory data analysis of weight and carcass phenotypes

Juan P. Steibel

February 1, 2017

Load data

Fit mixed models to exit WG,LMA and BF

LMA