First we load the data and perform a few data object checks
rm(list=ls())
setwd("C:\\Users\\steibelj\\OneDrive\\Documents\\kaitlin")
load("master_data_new.Rdata")
ls()
## [1] "master_data_new"
class(master_data_new) # we are working with a data.frame
## [1] "data.frame"
dim(master_data_new) #why so many rows? (more than animals)
## [1] 8009 28
colnames(master_data_new)
## [1] "Animal" "Rep" "Period" "Measure"
## [5] "Mark" "Litter" "Current_pen" "Current_room"
## [9] "Sex" "Weight" "Obs" "Front_ls"
## [13] "Middle_ls" "Rear_ls" "Total_ls" "Dam"
## [17] "Sire" "Birthdate" "Nurs_wt_date" "Nurs_wt"
## [21] "Fin_wt_date" "Fin_wt" "Sow_wt_date" "Sow_wt"
## [25] "Exit_wt_date" "Exit_wt" "BF" "LMA"
head(master_data_new)
## Animal Rep Period Measure Mark Litter Current_pen Current_room Sex
## 1 1 2 Nursery Post 1 37 ML1 N2 g
## 2 1 2 Sow Pre 1 37 4 F4 g
## 3 1 2 Nursery Pre 1 37 ML1 N2 g
## 4 1 2 Finisher Stable 4 37 5 F4 g
## 5 1 2 Nursery Stable 2 37 ML1 N2 g
## 6 1 2 Finisher Post 1 37 5 F4 g
## Weight Obs Front_ls Middle_ls Rear_ls Total_ls Dam Sire
## 1 10.8 KW 6 2 2 10 Y1704 Bold Force
## 2 173.0 KW 6 8 4 18 Y1704 Bold Force
## 3 10.8 JS 1 0 0 1 Y1704 Bold Force
## 4 45.8 JS 19 7 10 36 Y1704 Bold Force
## 5 10.8 KW 6 11 7 24 Y1704 Bold Force
## 6 45.8 KW 63 74 28 165 Y1704 Bold Force
## Birthdate Nurs_wt_date Nurs_wt Fin_wt_date Fin_wt Sow_wt_date Sow_wt
## 1 2014-03-05 2014-03-25 10.8 2014-05-12 45.8 2014-07-28 173
## 2 2014-03-05 2014-03-25 10.8 2014-05-12 45.8 2014-07-28 173
## 3 2014-03-05 2014-03-25 10.8 2014-05-12 45.8 2014-07-28 173
## 4 2014-03-05 2014-03-25 10.8 2014-05-12 45.8 2014-07-28 173
## 5 2014-03-05 2014-03-25 10.8 2014-05-12 45.8 2014-07-28 173
## 6 2014-03-05 2014-03-25 10.8 2014-05-12 45.8 2014-07-28 173
## Exit_wt_date Exit_wt BF LMA
## 1 2014-09-02 234 1.2 52.48
## 2 2014-09-02 234 1.2 52.48
## 3 2014-09-02 234 1.2 52.48
## 4 2014-09-02 234 1.2 52.48
## 5 2014-09-02 234 1.2 52.48
## 6 2014-09-02 234 1.2 52.48
It is clear that each animal is logged several times (Period: three levels and Measure: three levels). Let’s extract only the rows corresponding to nursery and pre that will contain the largest number of animals and let’s only get the rows pertaining growth and carcass phenotypes (and relevant covariates/factors).
nurs<-master_data_new[master_data_new$Period=="Nursery",]
nurs_pre<-nurs[nurs$Measure=="Pre",]
dts<-nurs_pre[,c(1,2,5,6,9,18:28)]
head(dts)
## Animal Rep Mark Litter Sex Birthdate Nurs_wt_date Nurs_wt Fin_wt_date
## 3 1 2 1 37 g 2014-03-05 2014-03-25 10.8 2014-05-12
## 14 2 2 1 37 b 2014-03-05 2014-03-25 11.6 2014-05-12
## 23 3 2 1 37 g 2014-03-05 2014-03-25 12.6 2014-05-12
## 27 4 2 2 37 b 2014-03-05 2014-03-25 11.0 2014-05-12
## 38 5 2 2 37 g 2014-03-05 2014-03-25 12.3 2014-05-12
## 40 6 2 1 37 b 2014-03-05 2014-03-25 9.2 <NA>
## Fin_wt Sow_wt_date Sow_wt Exit_wt_date Exit_wt BF LMA
## 3 45.8 2014-07-28 173 2014-09-02 234 1.2 52.48
## 14 46.2 <NA> NA 2014-08-05 200 1.1 43.62
## 23 48.2 2014-07-28 162 2014-09-02 237 1.1 56.86
## 27 50.8 <NA> NA 2014-08-05 218 1.2 41.82
## 38 49.0 2014-07-28 183 2014-09-02 259 1.4 49.92
## 40 NA <NA> NA <NA> NA NA NA
head(dts$Nurs_wt)
## [1] 10.8 11.6 12.6 11.0 12.3 9.2
head(dts$Nurs_wt_date)
## [1] "2014-03-25" "2014-03-25" "2014-03-25" "2014-03-25" "2014-03-25"
## [6] "2014-03-25"
head(dts$LMA)
## [1] 52.48 43.62 56.86 41.82 49.92 NA
head(table(dts$Exit_wt))
##
## 139 173 173.5 174 176 177
## 1 1 1 1 3 1
Type inconsistencies have been fixed.
Let’s proceed with a descriptive analysis.
rm(master_data_new,nurs,nurs_pre) #cleanup first
#missing data patterns
table(is.na(as.numeric(dts$Nurs_wt)),dts$Rep,dts$Sex) #nursery
## , , = b
##
##
## 2 3 4 5 6 7 8 9
## FALSE 60 70 70 70 65 70 70 70
##
## , , = g
##
##
## 2 3 4 5 6 7 8 9
## FALSE 60 70 70 70 68 70 70 70
table(is.na(as.numeric(dts$Fin_wt)),dts$Rep,dts$Sex) #finisher
## , , = b
##
##
## 2 3 4 5 6 7 8 9
## FALSE 55 69 69 70 64 67 69 67
## TRUE 5 1 1 0 1 3 1 3
##
## , , = g
##
##
## 2 3 4 5 6 7 8 9
## FALSE 59 70 70 69 67 69 70 70
## TRUE 1 0 0 1 1 1 0 0
table(is.na(as.numeric(dts$Sow_wt)),dts$Rep,dts$Sex) #sow
## , , = b
##
##
## 2 3 4 5 6 7 8 9
## FALSE 0 0 0 0 0 0 0 0
## TRUE 60 70 70 70 65 70 70 70
##
## , , = g
##
##
## 2 3 4 5 6 7 8 9
## FALSE 58 69 70 68 63 67 66 66
## TRUE 2 1 0 2 5 3 4 4
There are no Barrows with “sow weights” A few more missing data patterns
table(is.na(as.numeric(dts$Exit_wt)),dts$Rep,dts$Sex)
## , , = b
##
##
## 2 3 4 5 6 7 8 9
## FALSE 54 66 69 67 62 65 67 63
## TRUE 6 4 1 3 3 5 3 7
##
## , , = g
##
##
## 2 3 4 5 6 7 8 9
## FALSE 58 68 70 64 63 66 65 64
## TRUE 2 2 0 6 5 4 5 6
table(is.na(as.numeric(dts$BF)),dts$Rep,dts$Sex)
## , , = b
##
##
## 2 3 4 5 6 7 8 9
## FALSE 54 66 68 66 62 65 67 63
## TRUE 6 4 2 4 3 5 3 7
##
## , , = g
##
##
## 2 3 4 5 6 7 8 9
## FALSE 58 68 70 64 63 66 65 64
## TRUE 2 2 0 6 5 4 5 6
table(is.na(as.numeric(dts$LMA)),dts$Rep,dts$Sex)
## , , = b
##
##
## 2 3 4 5 6 7 8 9
## FALSE 54 66 68 66 62 65 67 63
## TRUE 6 4 2 4 3 5 3 7
##
## , , = g
##
##
## 2 3 4 5 6 7 8 9
## FALSE 58 68 70 64 63 66 65 64
## TRUE 2 2 0 6 5 4 5 6
sow data from rep 5 is now included.We can look into the variability in age and days between different measurements.
age_nurs<-as.numeric(dts$Nurs_wt_date-dts$Birthdate)
by(age_nurs,dts$Rep,summary)
## dts$Rep: 2
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19.00 21.00 22.00 21.58 22.00 24.00
## --------------------------------------------------------
## dts$Rep: 3
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 22.00 23.00 24.00 23.86 24.00 26.00
## --------------------------------------------------------
## dts$Rep: 4
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.00 20.00 20.00 20.05 21.00 21.00
## --------------------------------------------------------
## dts$Rep: 5
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 22.00 23.00 24.00 24.24 25.00 30.00 1
## --------------------------------------------------------
## dts$Rep: 6
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 17.00 18.00 20.00 20.26 22.00 26.00
## --------------------------------------------------------
## dts$Rep: 7
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 21.00 22.00 23.00 22.66 23.00 25.00
## --------------------------------------------------------
## dts$Rep: 8
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 16.00 19.00 20.00 19.44 20.00 21.00
## --------------------------------------------------------
## dts$Rep: 9
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 17.0 18.0 20.0 19.8 21.0 22.0
Errors in the dates seem to have been fixed
nurs_fin_days<-as.numeric(dts$Fin_wt_date-dts$Nurs_wt_date)
by(nurs_fin_days,dts$Rep,summary)
## dts$Rep: 2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 48 48 48 48 48 48 6
## --------------------------------------------------------
## dts$Rep: 3
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 42 42 42 42 42 42
## --------------------------------------------------------
## dts$Rep: 4
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 52 52 52 52 52 52
## --------------------------------------------------------
## dts$Rep: 5
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 42 42 42 42 42 42 1
## --------------------------------------------------------
## dts$Rep: 6
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 49 49 49 49 49 49
## --------------------------------------------------------
## dts$Rep: 7
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 42 42 42 42 42 42 4
## --------------------------------------------------------
## dts$Rep: 8
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 45 45 45 45 45 45 1
## --------------------------------------------------------
## dts$Rep: 9
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 45 45 45 45 45 45 3
looks good: each rep processed in 1 day
sow_fin_days<-as.numeric(dts$Sow_wt_date-dts$Fin_wt_date)
by(sow_fin_days,dts$Rep,summary)
## dts$Rep: 2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 77 77 77 77 77 77 61
## --------------------------------------------------------
## dts$Rep: 3
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 92 92 92 92 92 92 70
## --------------------------------------------------------
## dts$Rep: 4
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 77 77 77 77 77 77 70
## --------------------------------------------------------
## dts$Rep: 5
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 85 85 85 85 85 85 72
## --------------------------------------------------------
## dts$Rep: 6
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 78 78 78 78 78 78 70
## --------------------------------------------------------
## dts$Rep: 7
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 78 78 78 78 78 78 73
## --------------------------------------------------------
## dts$Rep: 8
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 87 87 87 87 87 87 74
## --------------------------------------------------------
## dts$Rep: 9
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 71 71 71 71 71 71 74
sow_exit_days<-as.numeric(dts$Exit_wt_date-dts$Sow_wt_date)
by(sow_exit_days,dts$Rep,summary)
## dts$Rep: 2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 36 36 36 36 36 36 62
## --------------------------------------------------------
## dts$Rep: 3
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 28 28 28 28 28 28 72
## --------------------------------------------------------
## dts$Rep: 4
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 29 29 29 29 29 29 70
## --------------------------------------------------------
## dts$Rep: 5
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 23 23 23 23 23 23 76
## --------------------------------------------------------
## dts$Rep: 6
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 30 30 30 30 30 30 70
## --------------------------------------------------------
## dts$Rep: 7
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 28 28 28 28 28 28 74
## --------------------------------------------------------
## dts$Rep: 8
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 32 32 32 32 32 32 75
## --------------------------------------------------------
## dts$Rep: 9
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 35 35 35 35 35 35 76
nurs_exit_days<-as.numeric(dts$Exit_wt_date-dts$Nurs_wt_date)
by(nurs_exit_days,dts$Rep,summary)
## dts$Rep: 2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 133.0 133.0 161.0 147.5 161.0 161.0 8
## --------------------------------------------------------
## dts$Rep: 3
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 133.0 133.0 162.0 147.7 162.0 162.0 6
## --------------------------------------------------------
## dts$Rep: 4
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 133.0 133.0 158.0 145.6 158.0 158.0 1
## --------------------------------------------------------
## dts$Rep: 5
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 120.0 120.0 120.0 134.8 150.0 150.0 10
## --------------------------------------------------------
## dts$Rep: 6
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 140.0 140.0 148.5 148.5 157.0 157.0 7
## --------------------------------------------------------
## dts$Rep: 7
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 121.0 121.0 134.5 134.5 148.0 148.0 8
## --------------------------------------------------------
## dts$Rep: 8
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 116.0 116.0 116.0 139.6 164.0 164.0 8
## --------------------------------------------------------
## dts$Rep: 9
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 117.0 117.0 151.0 134.1 151.0 151.0 13
aggregate(nurs_exit_days~dts$Sex:dts$Rep,FUN=function(x) c(min(x,na.rm=T),mean(x,na.rm=T),max(x,na.rm=T)))
## dts$Sex dts$Rep nurs_exit_days.1 nurs_exit_days.2 nurs_exit_days.3
## 1 b 2 133 133 133
## 2 g 2 161 161 161
## 3 b 3 133 133 133
## 4 g 3 162 162 162
## 5 b 4 133 133 133
## 6 g 4 158 158 158
## 7 b 5 120 120 120
## 8 g 5 150 150 150
## 9 b 6 140 140 140
## 10 g 6 157 157 157
## 11 b 7 121 121 121
## 12 g 7 148 148 148
## 13 b 8 116 116 116
## 14 g 8 164 164 164
## 15 b 9 117 117 117
## 16 g 9 151 151 151
Conclussion: if all periods are uniform in length within rep and sex, then we only need to include Rep and Sex effects to correct for those. In particular rep:sex interaction would take care of it all.Weight gains should be computed with the correct length of time and the rep:sex could be ignored if we assume that weight gain was constant.
Something to think about is differential weight gain with age and initial weight (compensatory growth).
nurs_exit_WG<-as.numeric(dts$Exit_wt)-as.numeric(dts$Nurs_wt)
plot(nurs_exit_WG~nurs_exit_days,
col=c("red","black")[((dts$Sex=="b")+1)])
Graphic looks good now. no switched sex labels. Also notice that the WG is not linear with days nurs-exit.
sow_nurs_days<-as.numeric(dts$Sow_wt_date-dts$Nurs_wt_date)
by(sow_nurs_days,dts$Rep,summary)
## dts$Rep: 2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 125 125 125 125 125 125 61
## --------------------------------------------------------
## dts$Rep: 3
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 134 134 134 134 134 134 70
## --------------------------------------------------------
## dts$Rep: 4
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 129 129 129 129 129 129 70
## --------------------------------------------------------
## dts$Rep: 5
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 127 127 127 127 127 127 72
## --------------------------------------------------------
## dts$Rep: 6
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 127 127 127 127 127 127 70
## --------------------------------------------------------
## dts$Rep: 7
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 120 120 120 120 120 120 73
## --------------------------------------------------------
## dts$Rep: 8
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 132 132 132 132 132 132 74
## --------------------------------------------------------
## dts$Rep: 9
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 116 116 116 116 116 116 74
Compare now age at exit of barrows and age at beginning of sow stage
exit_birth_days<-as.numeric(dts$Exit_wt_date-dts$Birthdate)
by(exit_birth_days[dts$Sex=="b"],dts$Rep[dts$Sex=="b"],summary)
## dts$Rep[dts$Sex == "b"]: 2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 153.0 154.0 155.0 154.7 155.0 157.0 6
## --------------------------------------------------------
## dts$Rep[dts$Sex == "b"]: 3
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 155 156 157 157 158 159 4
## --------------------------------------------------------
## dts$Rep[dts$Sex == "b"]: 4
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 151 153 153 153 154 154 1
## --------------------------------------------------------
## dts$Rep[dts$Sex == "b"]: 5
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 142.0 143.0 144.0 144.2 145.0 150.0 4
## --------------------------------------------------------
## dts$Rep[dts$Sex == "b"]: 6
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 157 158 160 160 162 166 2
## --------------------------------------------------------
## dts$Rep[dts$Sex == "b"]: 7
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 142.0 143.0 144.0 143.7 145.0 146.0 4
## --------------------------------------------------------
## dts$Rep[dts$Sex == "b"]: 8
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 132.0 135.0 136.0 135.3 136.0 137.0 3
## --------------------------------------------------------
## dts$Rep[dts$Sex == "b"]: 9
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 134.0 136.0 137.0 136.8 138.0 139.0 7
by(exit_birth_days[dts$Sex=="g"],dts$Rep[dts$Sex=="g"],summary)
## dts$Rep[dts$Sex == "g"]: 2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 180.0 182.0 182.0 182.4 183.0 184.0 2
## --------------------------------------------------------
## dts$Rep[dts$Sex == "g"]: 3
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 184.0 185.0 186.0 185.8 186.0 188.0 2
## --------------------------------------------------------
## dts$Rep[dts$Sex == "g"]: 4
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 176.0 178.0 178.0 178.1 179.0 179.0
## --------------------------------------------------------
## dts$Rep[dts$Sex == "g"]: 5
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 172.0 174.0 174.0 174.3 175.0 180.0 6
## --------------------------------------------------------
## dts$Rep[dts$Sex == "g"]: 6
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 174.0 177.0 177.0 177.6 179.0 183.0 5
## --------------------------------------------------------
## dts$Rep[dts$Sex == "g"]: 7
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 169.0 170.0 171.0 170.6 171.0 173.0 4
## --------------------------------------------------------
## dts$Rep[dts$Sex == "g"]: 8
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 180.0 183.0 184.0 183.5 184.0 185.0 5
## --------------------------------------------------------
## dts$Rep[dts$Sex == "g"]: 9
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 168 169 171 171 172 173 6
sow_birth_days<-as.numeric(dts$Sow_wt_date-dts$Birthdate)
by(sow_birth_days,dts$Rep,summary)
## dts$Rep: 2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 144.0 146.0 146.0 146.4 147.0 148.0 61
## --------------------------------------------------------
## dts$Rep: 3
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 156.0 157.0 158.0 157.8 158.0 160.0 70
## --------------------------------------------------------
## dts$Rep: 4
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 147.0 149.0 149.0 149.1 150.0 150.0 70
## --------------------------------------------------------
## dts$Rep: 5
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 149.0 151.0 151.0 151.3 152.0 157.0 72
## --------------------------------------------------------
## dts$Rep: 6
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 144.0 147.0 147.0 147.6 149.0 153.0 70
## --------------------------------------------------------
## dts$Rep: 7
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 141.0 142.0 143.0 142.6 143.0 145.0 73
## --------------------------------------------------------
## dts$Rep: 8
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 148.0 151.0 152.0 151.5 152.0 153.0 74
## --------------------------------------------------------
## dts$Rep: 9
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 133.0 134.0 136.0 135.9 137.0 138.0 74
dt<-c(exit_birth_days[dts$Sex=="b"],exit_birth_days[dts$Sex=="g"],
sow_birth_days[dts$Sex=="g"])
rp<-c(dts$Rep[dts$Sex=="b"],dts$Rep[dts$Sex=="g"],dts$Rep[dts$Sex=="g"])
cls<-c(rep("barrow exit",length(exit_birth_days[dts$Sex=="b"])),
rep("gilt exit",length(exit_birth_days[dts$Sex=="g"])),
rep("gilt sow",length(exit_birth_days[dts$Sex=="g"])))
by(dt,cls,mean,na.rm=T)
## cls: barrow exit
## [1] 147.9397
## --------------------------------------------------------
## cls: gilt exit
## [1] 177.9073
## --------------------------------------------------------
## cls: gilt sow
## [1] 147.8847
As expected, the exit weight of barrows is very similar to weight of gilts at the begining of Sow period. Should be combine these twoo instead of just exit weight for barrows and sows? if we combine them, what to do with LMA and BF (only available at exit)?
library(gwaR)
library(regress)
library(qvalue)
mp<-read.table("SNP_Map.txt",header=T,sep="\t")
head(mp)
## Index Name Chromosome Position GenTrain.Score SNP ILMN.Strand
## 1 1 1_10573221 1 10573221 0.8320 [T/C] BOT
## 2 2 1_10673082 1 10673082 0.8076 [T/C] BOT
## 3 3 1_10723065 1 10723065 0.8107 [A/G] TOP
## 4 4 1_11337555 1 11337555 0.7925 [A/G] TOP
## 5 5 1_11407894 1 11407894 0.8670 [A/G] TOP
## 6 6 1_11426075 1 11426075 0.8675 [T/C] BOT
## Customer.Strand NormID
## 1 BOT 0
## 2 BOT 0
## 3 BOT 0
## 4 BOT 0
## 5 TOP 0
## 6 BOT 0
map<-mp[,c(3,4)]
rownames(map)<-mp$Name
colnames(map)<-c("chr","pos")
head(map)
## chr pos
## 1_10573221 1 10573221
## 1_10673082 1 10673082
## 1_10723065 1 10723065
## 1_11337555 1 11337555
## 1_11407894 1 11407894
## 1_11426075 1 11426075
load("Z_and_G_matrices.Rdata")
dim(G)
## [1] 1079 1079
dim(Z)
## [1] 1079 52925
dim(map)
## [1] 68516 2
sum(!colnames(Z)%in%rownames(map))
## [1] 0
map<-map[colnames(Z),]
dim(map)
## [1] 52925 2
to_include<-as.character(map$chr)%in%(1:18)
map<-map[to_include,]
Z<-Z[,to_include]
map$chr<-droplevels(map$chr)
#dts<-dts[rownames(dts)!="1158",]
rownames(dts)<-dts$"Animal" #check what is wrong with this variable name
dts$age_exit<-as.numeric(dts$Exit_wt_date-dts$Birthdate)
dts$Exit_wt<-as.numeric(as.character(dts$Exit_wt))
system.time({
gb<-gblup("Exit_wt",dts,
c(y~Sex+Rep+Sex:Rep+Nurs_wt+age_exit),
G,pos=c(T,T))
})
## user system elapsed
## 16.42 0.31 17.97
print(gb)
## gblup analysis of trait: Exit_wt
##
## fixed effects equation:
## y ~ Sex + Rep + Nurs_wt + age_exit + Sex:Rep
##
## random effects equation:
## ~G + In
##
## log-likelihood: -3526.022 converged in: 5 iterations
##
## estimated fixed effects:
## Estimate StdError test.st p.value
## (Intercept) -100.6174328 23.2186655 -4.3334718 1.467761e-05
## Sexg -13.9966832 4.1992991 -3.3330998 8.588412e-04
## Rep 2.0630817 0.6948136 2.9692594 2.985185e-03
## Nurs_wt 3.4460607 0.2225656 15.4833472 0.000000e+00
## age_exit 1.7731740 0.1377284 12.8744250 0.000000e+00
## Sexg:Rep -0.4328609 0.5591530 -0.7741368 4.388499e-01
##
## estimated variance components:
## Estimate StdError prop.var
## G 184.5469 29.46025 0.433276
## In 241.3869 18.23458 0.566724
sm<-summary(gb,ehat=T)
names(sm)
## [1] "uhat" "name" "ehat" "llik" "cycle" "formula"
## [7] "Vformula"
e<-sm$ehat-sm$uhat
plot(e~dts[rownames(e),"age_exit"])
varcomp(gb)
## Estimate StdError prop.var se
## G 184.5469 29.46025 0.433276 0.05885251
## In 241.3869 18.23458 0.566724 0.06285854
#perform GWA
system.time({
GV<-gwas(gb,t(Z))
})
## user system elapsed
## 64.11 0.82 76.55
summary(GV) #this is not working OK, I guess. Check and compare to getpvalue.
## <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values 0 0 0 0 0
## attr(,"class")
## [1] "summary.gwas"
#pvalues
pv<-getpvalue(GV,log.p = F)
#manhattan plot and qqplots
manhattan_plot(pv,map, threshold = 0.000001)
qqgplot(pv)
#backfat
dts$BF<-as.numeric(as.character(dts$BF))
system.time({
gb<-gblup("BF",dts,
c(y~Sex+Rep+Sex:Rep),
G,pos=c(T,T))
})
## user system elapsed
## 20.31 0.26 22.65
print(gb)
## gblup analysis of trait: BF
##
## fixed effects equation:
## y ~ Sex + Rep + Sex:Rep
##
## random effects equation:
## ~G + In
##
## log-likelihood: 543.5082 converged in: 6 iterations
##
## estimated fixed effects:
## Estimate StdError test.st p.value
## (Intercept) 1.879118804 0.064917044 28.9464628 0.000000000
## Sexg -0.157801773 0.056390696 -2.7983654 0.005136197
## Rep -0.003244479 0.011398314 -0.2846455 0.775915723
## Sexg:Rep -0.005134445 0.009419989 -0.5450585 0.585713257
##
## estimated variance components:
## Estimate StdError prop.var
## G 0.10010376 0.012725427 0.597646
## In 0.06739298 0.006196953 0.402354
varcomp(gb)
## Estimate StdError prop.var se
## G 0.10010376 0.012725427 0.597646 0.06020371
## In 0.06739298 0.006196953 0.402354 0.05390012
#perform GWA
system.time({
GV<-gwas(gb,t(Z))
})
## user system elapsed
## 58.23 0.93 61.48
summary(GV) #this is not working OK, I guess. Check and compare to getpvalue.
## <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values 0 0 0 0 0
## attr(,"class")
## [1] "summary.gwas"
#pvalues
pv<-getpvalue(GV,log.p = F)
#manhattan plot and qqplots
manhattan_plot(pv,map, threshold = 0.000001)
qqgplot(pv)
dts$LMA<-as.numeric(as.character(dts$LMA))
system.time({
gb<-gblup("LMA",dts,
c(y~Sex+Rep+Sex:Rep),
G,pos=c(T,T))
})
## user system elapsed
## 18.00 0.19 19.67
print(gb)
## gblup analysis of trait: LMA
##
## fixed effects equation:
## y ~ Sex + Rep + Sex:Rep
##
## random effects equation:
## ~G + In
##
## log-likelihood: -2181.516 converged in: 5 iterations
##
## estimated fixed effects:
## Estimate StdError test.st p.value
## (Intercept) 47.4745220 0.8542278 55.575950 0.0000000000
## Sexg 7.7204032 0.8218908 9.393466 0.0000000000
## Rep -0.5205652 0.1489099 -3.495840 0.0004725728
## Sexg:Rep 0.2502317 0.1372969 1.822559 0.0683701898
##
## estimated variance components:
## Estimate StdError prop.var
## G 13.53964 2.132432 0.4406418
## In 17.18745 1.306812 0.5593582
varcomp(gb)
## Estimate StdError prop.var se
## G 13.53964 2.132432 0.4406418 0.05886784
## In 17.18745 1.306812 0.5593582 0.06243765
#perform GWA
system.time({
GV<-gwas(gb,t(Z))
})
## user system elapsed
## 61.19 0.69 66.06
summary(GV) #this is not working OK, I guess. Check and compare to getpvalue.
## <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values 0 0 0 0 0
## attr(,"class")
## [1] "summary.gwas"
#pvalues
pv<-getpvalue(GV,log.p = F)
#manhattan plot and qqplots
manhattan_plot(pv,map, threshold = 0.000001)
qqgplot(pv)
There is no association signal for any component. But there is a very strong h2. Perhaps such estimated is inflated due to pen effects, etc, that are difficult to control for with multiple mixings. If that is the case, then the indirect genetics effects should absorb a large proportion of variance.
phenotype_carcass<-dts
save(phenotype_carcass,file="carcass.Rdata")