Sensor data summary

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
library(readr)
library(tidyr)
library(plotrix)
Warning: package 'plotrix' was built under R version 4.4.3

Loading in data

sensor <- read_csv("Z:/Isaac/Visual Features/1-5/BioCV_Cohorts_6-7.csv",col_types = cols(
    Id_Bio_Cohort = col_factor(),
    Bio_ID        = col_factor(),
    Cohort        = col_factor(),
    Date          = col_date(format = "%m/%d/%Y"),
    Loading_date  = col_date(format = "%m/%d/%Y"),
    Loading_parity= col_integer(),
    Sow_ID        = col_factor(),
    Time          = col_character(),
    Weaning_date  = col_date(format = "%m/%d/%Y"),
    c             = col_double(),
    sensor_ts     = col_datetime(),
    x             = col_double(),
    y             = col_double(),
    z             = col_double(),
    format        = col_character(),
    Entry_Far_Room= col_date(),
    FD_Gestal     = col_date(),
    FDM1          = col_date(),
    FD_Porcitec   = col_date(),
    Wean_Porcitec = col_date(),
    FARRFAIL      = col_integer(),
    LACTFAIL      = col_integer()
  )
)
prod_rec <- read.csv("Z:/Isaac/Visual Features/1-5/Production_cohorts_6_7.csv")

identifying structure of data

str(sensor)
spc_tbl_ [2,793,343 × 22] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ Id_Bio_Cohort : Factor w/ 191 levels "34009_6_1024",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ Bio_ID        : Factor w/ 111 levels "1024","1006",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ Cohort        : Factor w/ 2 levels "6","7": 1 1 1 1 1 1 1 1 1 1 ...
 $ Date          : Date[1:2793343], format: "2025-03-30" "2025-03-30" ...
 $ Loading_date  : Date[1:2793343], format: "2025-03-30" "2025-03-30" ...
 $ Loading_parity: int [1:2793343] NA NA NA NA NA NA NA NA NA NA ...
 $ Sow_ID        : Factor w/ 191 levels "34009","28352",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ Time          : chr [1:2793343] "00:00:00" "00:01:00" "00:02:00" "00:03:00" ...
 $ Weaning_date  : Date[1:2793343], format: "2025-04-25" "2025-04-25" ...
 $ c             : num [1:2793343] 30.3 30.3 30.3 30.3 30.4 ...
 $ sensor_ts     : POSIXct[1:2793343], format: "2025-03-30 00:00:00" "2025-03-30 00:01:00" ...
 $ x             : num [1:2793343] 0.34 0.338 0.338 0.332 0.332 ...
 $ y             : num [1:2793343] -0.328 -0.336 -0.336 -0.363 -0.363 ...
 $ z             : num [1:2793343] 0.877 0.875 0.875 0.879 0.879 ...
 $ format        : chr [1:2793343] "%Y/%m/%d %H:%M" "%Y/%m/%d %H:%M" "%Y/%m/%d %H:%M" "%Y/%m/%d %H:%M" ...
 $ Entry_Far_Room: Date[1:2793343], format: "2025-03-30" "2025-03-30" ...
 $ FD_Gestal     : Date[1:2793343], format: "2025-04-02" "2025-04-02" ...
 $ FDM1          : Date[1:2793343], format: "2025-04-01" "2025-04-01" ...
 $ FD_Porcitec   : Date[1:2793343], format: "2025-04-02" "2025-04-02" ...
 $ Wean_Porcitec : Date[1:2793343], format: "2025-04-21" "2025-04-21" ...
 $ FARRFAIL      : int [1:2793343] 0 0 0 0 0 0 0 0 0 0 ...
 $ LACTFAIL      : int [1:2793343] 0 0 0 0 0 0 0 0 0 0 ...
 - attr(*, "spec")=
  .. cols(
  ..   Id_Bio_Cohort = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
  ..   Bio_ID = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
  ..   Cohort = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
  ..   Date = col_date(format = "%m/%d/%Y"),
  ..   Loading_date = col_date(format = "%m/%d/%Y"),
  ..   Loading_parity = col_integer(),
  ..   Sow_ID = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
  ..   Time = col_character(),
  ..   Weaning_date = col_date(format = "%m/%d/%Y"),
  ..   c = col_double(),
  ..   sensor_ts = col_datetime(format = ""),
  ..   x = col_double(),
  ..   y = col_double(),
  ..   z = col_double(),
  ..   format = col_character(),
  ..   Entry_Far_Room = col_date(format = ""),
  ..   FD_Gestal = col_date(format = ""),
  ..   FDM1 = col_date(format = ""),
  ..   FD_Porcitec = col_date(format = ""),
  ..   Wean_Porcitec = col_date(format = ""),
  ..   FARRFAIL = col_integer(),
  ..   LACTFAIL = col_integer()
  .. )
 - attr(*, "problems")=<externalptr> 
str(prod_rec)
'data.frame':   200 obs. of  105 variables:
 $ Id_Bio_Cohort : chr  "29963_6_1072" "24555_6_15" "29823_6_20" "28475_6_33" ...
 $ ID            : int  29963 24555 29823 28475 28368 33390 34023 28225 33954 25503 ...
 $ Cohort        : int  6 6 6 6 6 6 6 6 6 6 ...
 $ Bio_ID        : int  1072 15 20 33 42 44 1001 1007 1013 1026 ...
 $ source_file_x : chr  "24_28_00_00_02_98" "24_28_00_00_02_12" "24_28_00_00_05_91" "24_28_00_00_03_81" ...
 $ Entry_Far_Room: chr  "3/30/25" "3/29/25" "3/29/25" "3/29/25" ...
 $ FD_Gestal     : chr  "4/4/25" "4/2/25" "4/1/25" "4/2/25" ...
 $ FDM1          : chr  "4/3/25" "4/1/25" "3/31/25" "4/1/25" ...
 $ FD_Porcitec   : chr  "11/11/24" "11/5/24" "4/1/25" "11/7/24" ...
 $ Wean_Porcitec : chr  "4/27/25" "4/29/25" "4/21/25" "4/27/25" ...
 $ PARITY        : int  0 5 1 1 2 2 3 1 2 4 ...
 $ TOTALBORN     : int  14 12 20 18 14 14 21 18 22 22 ...
 $ LIVEBORN      : int  14 12 19 17 12 12 16 17 18 17 ...
 $ STILLBORN     : int  0 0 1 1 2 2 1 0 0 2 ...
 $ MUMMIES       : int  0 0 0 0 0 0 4 1 4 3 ...
 $ NURSEOFF      : int  0 0 0 0 0 0 0 0 16 0 ...
 $ NURSEON       : int  0 0 0 0 0 0 0 0 0 0 ...
 $ FOSTEROFF     : int  0 0 5 1 0 0 1 1 3 3 ...
 $ FOSTERON      : int  0 4 0 0 3 3 0 0 0 0 ...
 $ WEAN1NUM      : int  14 12 14 13 13 14 13 13 0 12 ...
 $ WEANFSTSERV   : int  3 4 6 4 6 4 6 4 NA 4 ...
 $ REMOVEDATE    : chr  "" "" "" "" ...
 $ REMOVEREASON  : chr  "" "" "" "" ...
 $ CAL_LACT_DAY  : int  23 27 20 25 19 24 19 22 13 26 ...
 $ FARRFAIL      : int  0 0 0 0 0 0 0 0 0 0 ...
 $ LACTFAIL      : int  0 0 0 0 0 0 0 0 1 0 ...
 $ Lact_day_N3   : int  -3 -3 NA -3 -3 -3 -3 -3 -3 -3 ...
 $ Water_L_N3    : num  6.9 0 NA 8.9 8.73 ...
 $ Consumed_N3   : num  5.66 4.35 NA 4.4 4.41 4.32 4.64 4.23 4.3 4.37 ...
 $ Lact_day_N2   : int  -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 ...
 $ Water_L_N2    : num  13.27 0 19.69 10.01 7.29 ...
 $ Consumed_N2   : num  5.87 4.43 5.81 4.38 4.43 4.29 4.3 4.09 4.29 4.37 ...
 $ Lact_day_N1   : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
 $ Water_L_N1    : num  10 0 18.8 17.7 12.3 ...
 $ Consumed_N1   : num  5.42 4.63 5.93 4.52 4.4 3.13 4.44 1.93 4.53 4.38 ...
 $ Lact_day_0    : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Water_L_0     : num  10.13 0 22.47 11.77 8.24 ...
 $ Consumed_0    : num  6.96 4.72 6.53 5.23 5 4.49 3.83 1.37 4.84 6.62 ...
 $ Lact_day_1    : int  1 1 1 1 1 1 1 1 1 1 ...
 $ Water_L_1     : num  12.9 0 25 13.7 14.1 ...
 $ Consumed_1    : num  10.99 11.68 10.82 7.78 9.64 ...
 $ Lact_day_2    : int  2 2 2 2 2 2 2 2 2 2 ...
 $ Water_L_2     : num  13.6 0 32.5 17.4 11.7 ...
 $ Consumed_2    : num  11.7 15.3 13.7 12.8 10.9 ...
 $ Lact_day_3    : int  3 3 3 3 3 3 3 3 3 3 ...
 $ Water_L_3     : num  16.9 0 41.3 23 21.4 ...
 $ Consumed_3    : num  17.1 17.7 18.2 17.2 14.8 ...
 $ Lact_day_4    : int  4 4 4 4 4 4 4 4 4 4 ...
 $ Water_L_4     : num  19.5 0 42.1 26.2 18.2 ...
 $ Consumed_4    : num  17.3 16.7 19.4 19.9 12.9 ...
 $ Lact_day_5    : int  5 5 5 5 5 5 5 5 5 5 ...
 $ Water_L_5     : num  21.6 0 42.8 22.2 24.1 ...
 $ Consumed_5    : num  16.9 17.9 15 14.7 13.6 ...
 $ Lact_day_6    : int  6 6 6 6 6 6 6 6 6 6 ...
 $ Water_L_6     : num  21.3 0 39.2 28.5 18.7 ...
 $ Consumed_6    : num  20.1 19.9 14.9 18.4 13.8 ...
 $ Lact_day_7    : int  7 7 7 7 7 7 7 7 7 7 ...
 $ Water_L_7     : num  25.8 0 45.7 36.4 24.9 ...
 $ Consumed_7    : num  20.6 20.9 23.9 22.5 17.2 ...
 $ Lact_day_8    : int  8 8 8 8 8 8 8 8 8 8 ...
 $ Water_L_8     : num  26.7 0 49 41 22.6 ...
 $ Consumed_8    : num  23.9 23.7 22.6 25.9 19.9 ...
 $ Lact_day_9    : int  9 9 9 9 9 9 9 9 9 9 ...
 $ Water_L_9     : num  27.2 0 43.5 41.7 22.8 ...
 $ Consumed_9    : num  19.7 23.1 21.3 24.7 15.2 ...
 $ Lact_day_10   : int  10 10 10 10 10 10 10 10 10 10 ...
 $ Water_L_10    : num  23.8 0 41.8 53.9 30.4 ...
 $ Consumed_10   : num  21.4 25.2 25.2 25.6 18.6 ...
 $ Lact_day_11   : int  11 11 11 11 11 11 11 11 11 11 ...
 $ Water_L_11    : num  28.4 0 49.6 58.1 27.5 ...
 $ Consumed_11   : num  21.4 19.7 21.4 22.5 15.2 ...
 $ Lact_day_12   : int  12 12 12 12 12 12 12 12 12 12 ...
 $ Water_L_12    : num  31 0 50.3 49.9 28.5 ...
 $ Consumed_12   : num  24.4 23.5 15.8 21.1 14.3 ...
 $ Lact_day_13   : int  13 13 13 13 13 13 13 13 13 13 ...
 $ Water_L_13    : num  23.4 0 42.5 48.9 26.6 ...
 $ Consumed_13   : num  19.7 25.4 18.9 23.4 18.6 ...
 $ Lact_day_14   : int  14 14 14 14 14 14 14 14 14 14 ...
 $ Water_L_14    : num  41.1 0 41.4 49.9 27.1 ...
 $ Consumed_14   : num  24.3 18.8 25 26 17.8 ...
 $ Lact_day_15   : int  15 15 15 15 15 15 15 15 15 15 ...
 $ Water_L_15    : num  25.5 0 47.4 44.3 26.9 ...
 $ Consumed_15   : num  28.8 19.5 18.9 21.3 18 ...
 $ Lact_day_16   : int  16 16 16 16 16 16 16 16 16 16 ...
 $ Water_L_16    : num  28.6 0 40.7 54.2 19.2 ...
 $ Consumed_16   : num  22.1 23.4 20.2 18.9 13.1 ...
 $ Lact_day_17   : int  NA 17 17 17 17 17 17 17 17 17 ...
 $ Water_L_17    : num  NA 0 86.3 35.7 25.7 ...
 $ Consumed_17   : num  NA 26.5 23 24.3 14.4 ...
 $ Lact_day_18   : int  NA 18 18 18 18 NA 18 NA 18 18 ...
 $ Water_L_18    : num  NA 0 56.5 44.2 28.2 ...
 $ Consumed_18   : num  NA 22.5 23.1 25.6 19.3 ...
 $ Lact_day_19   : int  NA NA 19 NA NA NA NA NA NA NA ...
 $ Water_L_19    : num  NA NA 54.4 NA NA ...
 $ Consumed_19   : num  NA NA 21.2 NA NA ...
 $ Lact_day_20   : int  NA NA NA NA NA NA NA NA NA NA ...
 $ Water_L_20    : num  NA NA NA NA NA NA NA NA NA NA ...
 $ Consumed_20   : num  NA NA NA NA NA NA NA NA NA NA ...
 $ Lact_day_21   : int  NA NA NA NA NA NA NA NA NA NA ...
  [list output truncated]

counting unique sow ids

length(unique(sensor$Sow_ID))
[1] 191

focusing on ID, cohort, timestamp, x,y,z,c

focused <- sensor %>% 
  select(Sow_ID,Cohort,sensor_ts,FD_Gestal,c,x,y,z)

colnames(focused)
[1] "Sow_ID"    "Cohort"    "sensor_ts" "FD_Gestal" "c"         "x"        
[7] "y"         "z"        

10 minute time window

focused10 <- focused %>% 
  mutate(
    window10 = floor_date(sensor_ts, unit = "10 minutes")
  )
library(dplyr)

df_window_summary <- focused10 %>%
  group_by(Sow_ID, window10) %>%
  summarise(
    mean_x = mean(x, na.rm = TRUE),
    var_x  = var(x,  na.rm = TRUE),
    
    mean_y = mean(y, na.rm = TRUE),
    var_y  = var(y,  na.rm = TRUE),
    
    mean_z = mean(z, na.rm = TRUE),
    var_z  = var(z,  na.rm = TRUE),
    
    mean_c = mean(c, na.rm = TRUE),
    var_c  = var(c,  na.rm = TRUE),
    
    n_obs  = n(),   # optional but VERY useful
    FD_Gestal = first(FD_Gestal),
    .groups = "drop"
  )

nesting by sow

nested_sensor <- df_window_summary %>% 
  group_by(Sow_ID) %>% 
  nest()
library(dplyr)

sow_time_summary <- df_window_summary %>%
  group_by(Sow_ID) %>%
  summarise(
    first_obs = min(window10, na.rm = TRUE),
    last_obs  = max(window10, na.rm = TRUE),
    n_windows = n(),
    FD_Gestal = first(FD_Gestal),
    hr_before = as.numeric(
  difftime(first(FD_Gestal), first_obs, units = "hours")
),
    duration_hours = as.numeric(difftime(last_obs, first_obs, units = "hours")),
    .groups = "drop"
  )
sow_time_summary
# A tibble: 191 × 7
   Sow_ID first_obs           last_obs            n_windows FD_Gestal  hr_before
   <fct>  <dttm>              <dttm>                  <int> <date>         <dbl>
 1 34009  2025-03-30 00:00:00 2025-03-31 07:40:00       147 2025-04-02      72  
 2 28352  2025-04-02 07:00:00 2025-04-24 10:00:00      2552 2025-04-03      17  
 3 30071  2025-04-27 09:10:00 2025-04-27 09:40:00         4 2025-04-29      38.8
 4 28918  2025-04-23 09:50:00 2025-05-04 12:40:00      1458 2025-04-27      86.2
 5 33981  2025-03-30 12:20:00 2025-04-20 09:30:00      2487 2025-04-02      59.7
 6 26441  2025-04-23 09:50:00 2025-05-01 18:50:00       879 2025-04-27      86.2
 7 28380  2025-03-30 12:20:00 2025-04-07 11:30:00       975 2025-04-01      35.7
 8 31719  2025-04-23 09:50:00 2025-05-04 12:40:00      1455 2025-04-27      86.2
 9 34585  2025-03-30 00:00:00 2025-04-20 10:50:00      2890 2025-04-04     120  
10 29914  2025-04-23 09:50:00 2025-05-04 12:40:00      1459 2025-04-25      38.2
# ℹ 181 more rows
# ℹ 1 more variable: duration_hours <dbl>
my32sows <- c('34113','31336','31460','28927','34023','28829','31043','29934','25618','28442','24555','30312','28475','28507','29818','28806','31024','30212','34041','31719','33390','31520','34096','29933','29284','26413','30094','29989','28110','31229','33981','31482')
mysowlengthsum <- sow_time_summary %>% filter(as.character(Sow_ID) %in% my32sows)
save(sow_time_summary,my32sows,nested_sensor,file="Z:/Isaac/Visual Features/1-5/Sensors/3_2.RData")

Summaries

sowsum <- sow_time_summary %>% 
  summarise(
    n_sows = length(unique(Sow_ID)),
    mean_hr_before = mean(hr_before,na.rm=TRUE),
    se_hr_before = std.error(hr_before,na.rm=TRUE),
    min_hr_before = min(hr_before,na.rm=TRUE),
    max_hr_before = max(hr_before,na.rm=TRUE),
    n_less_than_50 = sum(hr_before < 50,na.rm=TRUE),
    n_greater_than_50 = sum(hr_before > 50, na.rm = TRUE),
    n_greater_than_100 = sum(hr_before > 100, na.rm=TRUE)
  )

sowsumlong <- sowsum %>% 
  pivot_longer(
    cols=everything(),
    names_to = "statistic",
    values_to = "value"
  )
sowsumlong
# A tibble: 8 × 2
  statistic           value
  <chr>               <dbl>
1 n_sows             191   
2 mean_hr_before      67.9 
3 se_hr_before         2.05
4 min_hr_before       11.7 
5 max_hr_before      144   
6 n_less_than_50      44   
7 n_greater_than_50  143   
8 n_greater_than_100  17   

sows with video summary

mysowsum <- sow_time_summary %>% 
  filter(as.character(Sow_ID)%in% my32sows) %>% 
  summarise(
    n_sows = length(unique(Sow_ID)),
    mean_hr_before = mean(hr_before,na.rm=TRUE),
    se_hr_before = std.error(hr_before,na.rm=TRUE),
    min_hr_before = min(hr_before,na.rm=TRUE),
    max_hr_before = max(hr_before,na.rm=TRUE),
    n_less_than_50 = sum(hr_before < 50,na.rm=TRUE),
    n_greater_than_50 = sum(hr_before > 50, na.rm = TRUE),
    n_greater_than_100 = sum(hr_before > 100, na.rm=TRUE)
  )

mysowsumlong <- mysowsum %>% 
  pivot_longer(
    cols=everything(),
    names_to = "statistic",
    values_to = "value"
  )
mysowsumlong
# A tibble: 8 × 2
  statistic           value
  <chr>               <dbl>
1 n_sows              31   
2 mean_hr_before      76.8 
3 se_hr_before         4.20
4 min_hr_before       35.7 
5 max_hr_before      130.  
6 n_less_than_50       3   
7 n_greater_than_50   28   
8 n_greater_than_100   5