#Summary of phenodata

##setup and file input Let’s start by loading packages and data:

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   1.0.0 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggplot2)
library(lubridate)
## Loading required package: timechange
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(readr)
library(readxl)
dts<-readxl::read_xlsx("D:/Selsby/Nursery_Finish_Data_112222 for Dr. Selsby.xlsx")
dts
## # A tibble: 175,081 × 22
##    SiteName DNA_ID     Sex Breed DOB                 Birth…¹ PigWeanDate        
##    <chr>    <chr>    <dbl> <dbl> <dttm>              <chr>   <dttm>             
##  1 ATLAS    8560156…     2   200 2020-06-07 00:00:00 1.7689… 2020-06-25 00:00:00
##  2 ATLAS    8560155…     2   200 2020-06-07 00:00:00 1.7237  2020-06-25 00:00:00
##  3 ATLAS    8560149…     2   200 2020-06-05 00:00:00 1.1793  2020-06-25 00:00:00
##  4 ATLAS    8560147…     2   200 2020-06-05 00:00:00 1.3153… 2020-06-25 00:00:00
##  5 ATLAS    8560142…     2   200 2020-06-04 00:00:00 1.5875… 2020-06-25 00:00:00
##  6 ATLAS    8560141…     2   200 2020-06-04 00:00:00 1.4060… 2020-06-25 00:00:00
##  7 ATLAS    8560141…     2   200 2020-06-04 00:00:00 1.1339… 2020-06-25 00:00:00
##  8 ATLAS    8560140…     2   200 2020-06-04 00:00:00 1.7237  2020-06-25 00:00:00
##  9 ATLAS    8560137…     2   200 2020-06-03 00:00:00 1.4968… 2020-06-25 00:00:00
## 10 ATLAS    8560136…     2   200 2020-06-03 00:00:00 1.6782… 2020-06-25 00:00:00
## # … with 175,071 more rows, 15 more variables: PigWeanWeightKg <dbl>,
## #   OnTestDate <dttm>, OnTestWeightKg <dbl>, BarnName <dbl>, PenName <dbl>,
## #   Pencode <dbl>, OffTestDate <dttm>, OffTestWeightKg <dbl>, Backfat <dbl>,
## #   LoinDepth <dbl>, IMF <dbl>, RemovalDate <lgl>, RemovalReason <lgl>,
## #   CullDate <lgl>, CullReason <lgl>, and abbreviated variable name
## #   ¹​BirthweightKg

Basic summaries

Let’s first look into sample size, culled animals, etc

#sample size
nrow(dts)
## [1] 175081
#number of culled animals (tracked using the OffTestDate column)
sum(is.na(dts$OffTestDate))
## [1] 11265
dtc<-filter(dts,!is.na(OffTestDate))
#number of animals with complete records
nrow(dtc)
## [1] 163816
table(dtc$SiteName)
## 
##        ADAMS        ATLAS    BLUE HILL     FAIRBURY        HOUGH      INSIGHT 
##        16462        24402        22567         4580        12745        18606 
## MANITOBA NUC     MIDRIVER      RICHTER       WITTEN 
##        10774        19972        16057        17651

summary of days in each state

We compute the difference between each date and DOB and also look into the length of the test period

dtc<-mutate(dtc,age_at_wean=round(PigWeanDate-DOB),age_on_test=round(OnTestDate-DOB),
            age_off_test=round(OffTestDate-DOB),days_in_test=OffTestDate-OnTestDate,BirthweightKg=as.double(BirthweightKg),year=year(dtc$DOB))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
summarize(dtc,age_at_wean=mean(age_at_wean),age_on_test=mean(age_on_test),
          age_on_test=mean(age_on_test),age_off_test=mean(age_off_test),days_in_test=mean(days_in_test))
## # A tibble: 1 × 4
##   age_at_wean   age_on_test   age_off_test  days_in_test 
##   <drtn>        <drtn>        <drtn>        <drtn>       
## 1 22.71697 days 77.11163 days 160.6323 days 83.52068 days

Interpretation: average age at weaning 23 days, animals spent about 83 days on test (entered test at 77 d.o and left at 161 days old approximately)

weight gains and a rough graphic summary

dtc<-mutate(dtc,ADG_wean=c(PigWeanWeightKg-BirthweightKg)/as.double(age_at_wean),ADG_test=(OffTestWeightKg-OnTestWeightKg)/as.double(round(days_in_test)),
            month_birth=as.factor(month(DOB)),Sex=as.factor(Sex))
ggplot(dtc,aes(x=as.double(month_birth),y=ADG_test,color=Sex))+geom_point()+geom_smooth()+facet_grid(cols=vars(SiteName),rows=vars(year))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 11 rows containing non-finite values (`stat_smooth()`).
## Warning: Computation failed in `stat_smooth()`
## Computation failed in `stat_smooth()`
## Computation failed in `stat_smooth()`
## Caused by error in `smooth.construct.cr.smooth.spec()`:
## ! x has insufficient unique values to support 10 knots: reduce k.
## Warning: Removed 11 rows containing missing values (`geom_point()`).

ggplot(dtc,aes(x=month_birth,y=ADG_test))+geom_boxplot()+facet_wrap(.~Sex)+facet_wrap(~year)+facet_grid(cols=vars(SiteName),rows=vars(year))
## Warning: Removed 11 rows containing non-finite values (`stat_boxplot()`).

ggplot(dtc,aes(x=as.double(month_birth),y=OffTestWeightKg,color=Sex))+geom_point()+geom_smooth()+facet_grid(cols=vars(SiteName),rows=vars(year))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Computation failed in `stat_smooth()`
## Caused by error in `smooth.construct.cr.smooth.spec()`:
## ! x has insufficient unique values to support 10 knots: reduce k.
## Warning: Computation failed in `stat_smooth()`
## Computation failed in `stat_smooth()`
## Caused by error in `smooth.construct.cr.smooth.spec()`:
## ! x has insufficient unique values to support 10 knots: reduce k.

ggplot(dtc,aes(x=month_birth,y=OffTestWeightKg))+geom_boxplot()+facet_wrap(.~Sex)+facet_wrap(~year)+facet_grid(cols=vars(SiteName),rows=vars(year))