#Summary of phenodata
##setup and file input Let’s start by loading packages and data:
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.0
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(lubridate)
## Loading required package: timechange
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(readr)
library(readxl)
dts<-readxl::read_xlsx("D:/Selsby/Nursery_Finish_Data_112222 for Dr. Selsby.xlsx")
dts
## # A tibble: 175,081 × 22
## SiteName DNA_ID Sex Breed DOB Birth…¹ PigWeanDate
## <chr> <chr> <dbl> <dbl> <dttm> <chr> <dttm>
## 1 ATLAS 8560156… 2 200 2020-06-07 00:00:00 1.7689… 2020-06-25 00:00:00
## 2 ATLAS 8560155… 2 200 2020-06-07 00:00:00 1.7237 2020-06-25 00:00:00
## 3 ATLAS 8560149… 2 200 2020-06-05 00:00:00 1.1793 2020-06-25 00:00:00
## 4 ATLAS 8560147… 2 200 2020-06-05 00:00:00 1.3153… 2020-06-25 00:00:00
## 5 ATLAS 8560142… 2 200 2020-06-04 00:00:00 1.5875… 2020-06-25 00:00:00
## 6 ATLAS 8560141… 2 200 2020-06-04 00:00:00 1.4060… 2020-06-25 00:00:00
## 7 ATLAS 8560141… 2 200 2020-06-04 00:00:00 1.1339… 2020-06-25 00:00:00
## 8 ATLAS 8560140… 2 200 2020-06-04 00:00:00 1.7237 2020-06-25 00:00:00
## 9 ATLAS 8560137… 2 200 2020-06-03 00:00:00 1.4968… 2020-06-25 00:00:00
## 10 ATLAS 8560136… 2 200 2020-06-03 00:00:00 1.6782… 2020-06-25 00:00:00
## # … with 175,071 more rows, 15 more variables: PigWeanWeightKg <dbl>,
## # OnTestDate <dttm>, OnTestWeightKg <dbl>, BarnName <dbl>, PenName <dbl>,
## # Pencode <dbl>, OffTestDate <dttm>, OffTestWeightKg <dbl>, Backfat <dbl>,
## # LoinDepth <dbl>, IMF <dbl>, RemovalDate <lgl>, RemovalReason <lgl>,
## # CullDate <lgl>, CullReason <lgl>, and abbreviated variable name
## # ¹​BirthweightKg
Let’s first look into sample size, culled animals, etc
#sample size
nrow(dts)
## [1] 175081
#number of culled animals (tracked using the OffTestDate column)
sum(is.na(dts$OffTestDate))
## [1] 11265
dtc<-filter(dts,!is.na(OffTestDate))
#number of animals with complete records
nrow(dtc)
## [1] 163816
table(dtc$SiteName)
##
## ADAMS ATLAS BLUE HILL FAIRBURY HOUGH INSIGHT
## 16462 24402 22567 4580 12745 18606
## MANITOBA NUC MIDRIVER RICHTER WITTEN
## 10774 19972 16057 17651
We compute the difference between each date and DOB and also look into the length of the test period
dtc<-mutate(dtc,age_at_wean=round(PigWeanDate-DOB),age_on_test=round(OnTestDate-DOB),
age_off_test=round(OffTestDate-DOB),days_in_test=OffTestDate-OnTestDate,BirthweightKg=as.double(BirthweightKg),year=year(dtc$DOB))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
summarize(dtc,age_at_wean=mean(age_at_wean),age_on_test=mean(age_on_test),
age_on_test=mean(age_on_test),age_off_test=mean(age_off_test),days_in_test=mean(days_in_test))
## # A tibble: 1 × 4
## age_at_wean age_on_test age_off_test days_in_test
## <drtn> <drtn> <drtn> <drtn>
## 1 22.71697 days 77.11163 days 160.6323 days 83.52068 days
Interpretation: average age at weaning 23 days, animals spent about 83 days on test (entered test at 77 d.o and left at 161 days old approximately)
dtc<-mutate(dtc,ADG_wean=c(PigWeanWeightKg-BirthweightKg)/as.double(age_at_wean),ADG_test=(OffTestWeightKg-OnTestWeightKg)/as.double(round(days_in_test)),
month_birth=as.factor(month(DOB)),Sex=as.factor(Sex))
ggplot(dtc,aes(x=as.double(month_birth),y=ADG_test,color=Sex))+geom_point()+geom_smooth()+facet_grid(cols=vars(SiteName),rows=vars(year))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 11 rows containing non-finite values (`stat_smooth()`).
## Warning: Computation failed in `stat_smooth()`
## Computation failed in `stat_smooth()`
## Computation failed in `stat_smooth()`
## Caused by error in `smooth.construct.cr.smooth.spec()`:
## ! x has insufficient unique values to support 10 knots: reduce k.
## Warning: Removed 11 rows containing missing values (`geom_point()`).
ggplot(dtc,aes(x=month_birth,y=ADG_test))+geom_boxplot()+facet_wrap(.~Sex)+facet_wrap(~year)+facet_grid(cols=vars(SiteName),rows=vars(year))
## Warning: Removed 11 rows containing non-finite values (`stat_boxplot()`).
ggplot(dtc,aes(x=as.double(month_birth),y=OffTestWeightKg,color=Sex))+geom_point()+geom_smooth()+facet_grid(cols=vars(SiteName),rows=vars(year))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Computation failed in `stat_smooth()`
## Caused by error in `smooth.construct.cr.smooth.spec()`:
## ! x has insufficient unique values to support 10 knots: reduce k.
## Warning: Computation failed in `stat_smooth()`
## Computation failed in `stat_smooth()`
## Caused by error in `smooth.construct.cr.smooth.spec()`:
## ! x has insufficient unique values to support 10 knots: reduce k.
ggplot(dtc,aes(x=month_birth,y=OffTestWeightKg))+geom_boxplot()+facet_wrap(.~Sex)+facet_wrap(~year)+facet_grid(cols=vars(SiteName),rows=vars(year))