sensors of video sows

Loading

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyr)
library(forcats)
library(ggplot2)
library(purrr)
library(slider)
library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
library(ggforce)
library(FKF)
library(data.table)

Attaching package: 'data.table'
The following objects are masked from 'package:lubridate':

    hour, isoweek, mday, minute, month, quarter, second, wday, week,
    yday, year
The following object is masked from 'package:purrr':

    transpose
The following objects are masked from 'package:dplyr':

    between, first, last
library(readr)

# setwd("C:/Users/ipberg/OneDrive - Iowa State University/Spring26/Sensors/")
setwd("/Users/IsaacBerg/Library/CloudStorage/OneDrive-IowaStateUniversity/Spring26/Sensors/")
load("4-13.RData")
load("my32sows.RData")
farrow_events <- read_csv("Farrowing_data.csv") %>%
  rename(Sow_ID = `Sow ID`) %>%
  mutate(
    farrow_start_dt = mdy_hms(paste(`Farrowing Start date`, `Farrowing Start datetime`)),
    Sow_ID = as.character(Sow_ID)
  ) %>%
  select(Sow_ID, Cohort, farrow_start_dt)
Rows: 31 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (1): Farrowing Start date
dbl  (2): Sow ID, Cohort
time (1): Farrowing Start datetime

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

function for kalaman filter

# --- Kalman filter function ---
fit_kalman <- function(train_y, full_y) {
  
  a0 <- train_y[1]
  P0 <- matrix(1)
  dt <- ct <- matrix(0)
  Zt <- Tt <- matrix(1)
  
  fit <- optim(
    c(HHt = var(train_y, na.rm = TRUE) * .5,
      GGt = var(train_y, na.rm = TRUE) * .5),
    fn = function(par, ...)
      -fkf(HHt = matrix(0.000001), GGt = matrix(0.001), ...)$logLik,
    yt = rbind(train_y), a0 = a0, P0 = P0,
    dt = dt, ct = ct, Zt = Zt, Tt = Tt
  )
  
  fkf(
    a0 = a0, P0 = P0, dt = dt, ct = ct, Tt = Tt, Zt = Zt,
    HHt = matrix(0.000001),
    GGt = matrix(0.001),
    yt  = rbind(full_y)
  )$att[1, ] |> as.numeric()
}

95% CI

applying kalman filter

# --- apply to all sows ---
recresults95 <- sows_greater_60 %>%
  filter(Sow_ID %in% my32sows) %>% 
  unnest(data) %>%
  filter(sensor_ts >= (start_time + hours(12))) %>%
  group_by(Sow_ID) %>%
  nest() %>%
  left_join(train_lookup, by = "Sow_ID") %>%
  mutate(
    data = map2(train_y, data, function(train, df) {
      df %>% 
        mutate(
          # 1. Standard Kalman and CI calculations
          y_kalman = fit_kalman(train, m_var_24h),
          resid    = m_var_24h - y_kalman,
          se       = zoo::rollapply(resid, width = 96, FUN = sd, fill = NA, align = "right"),
          ci_lower = y_kalman - qt(0.975, df = 95) * se,
          ci_upper = y_kalman + qt(0.975, df = 95) * se,
          
          # 2. Define the breach flags
          above_upper = m_var_24h > ci_upper,
          below_lower = m_var_24h < ci_lower,
          
          # 3. Create the index for the FIRST spike
          # We use if_else to handle sows that never spike (returning a very large number)
          first_spike_idx = if(any(above_upper, na.rm = TRUE)) min(which(above_upper == TRUE)) else Inf,
          
          # 4. Now we can safely reference first_spike_idx
          is_post_spike = row_number() > first_spike_idx,
          crossed_lower_after_upper = is_post_spike & below_lower,
          
          # 5. Status Column
          violation_type = case_when(
            m_var_24h > ci_upper ~ 1,
            m_var_24h < ci_lower ~ -1,
            TRUE ~ 0
          )
        )
    })
  ) %>%
  select(-train_y)

plotting sows

recresults95 %>%
  unnest(data) %>%
  group_by(Sow_ID) %>%
  group_walk(~ {
    baseline_end <- min(.x$time_to_farrow, na.rm = TRUE) - 12
    p <- ggplot(.x, aes(x = time_to_farrow)) +
      geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper), fill = "steelblue", alpha = 0.2) +
      geom_line(aes(y = m_var_24h), color = "black", alpha = 0.3) +
      geom_line(aes(y = y_kalman),  color = "blue", linewidth = 0.8) +
      geom_vline(xintercept = 0, linetype = "dashed") +
      geom_vline(xintercept = baseline_end)+
      expand_limits(y = 0) +
      theme_bw() +
      labs(
        title = paste("Sow", .y$Sow_ID),
        x     = "Hours relative to farrowing",
        y     = "24h variance of magnitude"
      )
    print(p)
  })

Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
-Inf

creating table for results

# 1. Ensure the per-sow summary is fresh
recdt_perf_all95 <- recresults95 %>%
  unnest(data) %>%
  group_by(Sow_ID) %>%
  summarise(
    first_alarm  = min(time_to_farrow[above_upper == TRUE], na.rm = TRUE),
    second_alarm = min(time_to_farrow[crossed_lower_after_upper == TRUE], na.rm = TRUE),
    .groups = "drop"
  )
Warning: There were 10 warnings in `summarise()`.
The first warning was:
ℹ In argument: `first_alarm = min(time_to_farrow[above_upper == TRUE], na.rm =
  TRUE)`.
ℹ In group 4: `Sow_ID = 28442`.
Caused by warning in `min()`:
! no non-missing arguments to min; returning Inf
ℹ Run `dplyr::last_dplyr_warnings()` to see the 9 remaining warnings.
# 2. Build the final results table with lead-time tiers
recquant_res_final95 <- data.table(
  # --- OVERALL COUNTS ---
  n_total_sows               = nrow(recdt_perf_all95),
  n_first_alarm              = sum(is.finite(recdt_perf_all95$first_alarm)),
  n_no_first_alarm           = sum(!is.finite(recdt_perf_all95$first_alarm)),
  n_second_alarm             = sum(is.finite(recdt_perf_all95$second_alarm)),
  n_no_second_alarm          = sum(!is.finite(recdt_perf_all95$second_alarm)),
  
  # --- MEANS ---
  mean_first_alarm           = mean(recdt_perf_all95$first_alarm[is.finite(recdt_perf_all95$first_alarm)], na.rm = TRUE),
  mean_second_alarm          = mean(recdt_perf_all95$second_alarm[is.finite(recdt_perf_all95$second_alarm)], na.rm = TRUE),

  # --- FIRST ALARM DETAILED BREAKDOWN (Sums to 78) ---
  n_first_alarm_gt_48h_before = sum(recdt_perf_all95$first_alarm < -48, na.rm = TRUE),
  n_first_alarm_24_to_48h_pre = sum(recdt_perf_all95$first_alarm < -24 & recdt_perf_all95$first_alarm >= -48, na.rm = TRUE),
  n_first_alarm_0_to_24h_pre  = sum(recdt_perf_all95$first_alarm < 0 & recdt_perf_all95$first_alarm >= -24, na.rm = TRUE),
  n_first_alarm_on_FD         = sum(recdt_perf_all95$first_alarm >= 0 & recdt_perf_all95$first_alarm <= 24, na.rm = TRUE),
  n_first_alarm_after_FD      = sum(recdt_perf_all95$first_alarm > 24 & is.finite(recdt_perf_all95$first_alarm), na.rm = TRUE),
  
  # --- SECOND ALARM BREAKDOWN (Sums to 51) ---
  n_second_alarm_before_FD    = sum(recdt_perf_all95$second_alarm < 0, na.rm = TRUE),
  n_second_alarm_on_FD        = sum(recdt_perf_all95$second_alarm >= 0 & recdt_perf_all95$second_alarm <= 24, na.rm = TRUE),
  n_second_alarm_after_FD     = sum(recdt_perf_all95$second_alarm > 24 & is.finite(recdt_perf_all95$second_alarm), na.rm = TRUE)
)

# Transpose for final presentation
print(t(recquant_res_final95))
                                 [,1]
n_total_sows                 17.00000
n_first_alarm                15.00000
n_no_first_alarm              2.00000
n_second_alarm                9.00000
n_no_second_alarm             8.00000
mean_first_alarm            -26.91667
mean_second_alarm             2.25000
n_first_alarm_gt_48h_before   3.00000
n_first_alarm_24_to_48h_pre   7.00000
n_first_alarm_0_to_24h_pre    3.00000
n_first_alarm_on_FD           1.00000
n_first_alarm_after_FD        1.00000
n_second_alarm_before_FD      4.00000
n_second_alarm_on_FD          3.00000
n_second_alarm_after_FD       2.00000

90% CI

applying kalman filter

# --- apply to all sows ---
recresults90 <- sows_greater_60 %>%
  filter(Sow_ID %in% my32sows) %>% 
  unnest(data) %>%
  filter(sensor_ts >= (start_time + hours(12))) %>%
  group_by(Sow_ID) %>%
  nest() %>%
  left_join(train_lookup, by = "Sow_ID") %>%
  mutate(
    data = map2(train_y, data, function(train, df) {
      df %>% 
        mutate(
          # 1. Standard Kalman and CI calculations
          y_kalman = fit_kalman(train, m_var_24h),
          resid    = m_var_24h - y_kalman,
          se       = zoo::rollapply(resid, width = 96, FUN = sd, fill = NA, align = "right"),
          ci_lower = y_kalman - qt(0.95, df = 95) * se,
          ci_upper = y_kalman + qt(0.95, df = 95) * se,
          
          # 2. Define the breach flags
          above_upper = m_var_24h > ci_upper,
          below_lower = m_var_24h < ci_lower,
          
          # 3. Create the index for the FIRST spike
          # We use if_else to handle sows that never spike (returning a very large number)
          first_spike_idx = if(any(above_upper, na.rm = TRUE)) min(which(above_upper == TRUE)) else Inf,
          
          # 4. Now we can safely reference first_spike_idx
          is_post_spike = row_number() > first_spike_idx,
          crossed_lower_after_upper = is_post_spike & below_lower,
          
          # 5. Status Column
          violation_type = case_when(
            m_var_24h > ci_upper ~ 1,
            m_var_24h < ci_lower ~ -1,
            TRUE ~ 0
          )
        )
    })
  ) %>%
  select(-train_y)

plotting sows

recresults90 %>%
  unnest(data) %>%
  group_by(Sow_ID) %>%
  group_walk(~ {
    baseline_end <- min(.x$time_to_farrow, na.rm = TRUE) - 12
    p <- ggplot(.x, aes(x = time_to_farrow)) +
      geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper), fill = "steelblue", alpha = 0.2) +
      geom_line(aes(y = m_var_24h), color = "black", alpha = 0.3) +
      geom_line(aes(y = y_kalman),  color = "blue", linewidth = 0.8) +
      geom_vline(xintercept = 0, linetype = "dashed") +
      geom_vline(xintercept = baseline_end)+
      expand_limits(y = 0) +
      theme_bw() +
      labs(
        title = paste("Sow", .y$Sow_ID),
        x     = "Hours relative to farrowing",
        y     = "24h variance of magnitude"
      )
    print(p)
  })

Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
-Inf

creating table for results

# 1. Ensure the per-sow summary is fresh
recdt_perf_all90 <- recresults90 %>%
  unnest(data) %>%
  group_by(Sow_ID) %>%
  summarise(
    first_alarm  = min(time_to_farrow[above_upper == TRUE], na.rm = TRUE),
    second_alarm = min(time_to_farrow[crossed_lower_after_upper == TRUE], na.rm = TRUE),
    .groups = "drop"
  )
Warning: There were 6 warnings in `summarise()`.
The first warning was:
ℹ In argument: `first_alarm = min(time_to_farrow[above_upper == TRUE], na.rm =
  TRUE)`.
ℹ In group 8: `Sow_ID = 31482`.
Caused by warning in `min()`:
! no non-missing arguments to min; returning Inf
ℹ Run `dplyr::last_dplyr_warnings()` to see the 5 remaining warnings.
# 2. Build the final results table with lead-time tiers
recquant_res_final90 <- data.table(
  # --- OVERALL COUNTS ---
  n_total_sows               = nrow(recdt_perf_all90),
  n_first_alarm              = sum(is.finite(recdt_perf_all90$first_alarm)),
  n_no_first_alarm           = sum(!is.finite(recdt_perf_all90$first_alarm)),
  n_second_alarm             = sum(is.finite(recdt_perf_all90$second_alarm)),
  n_no_second_alarm          = sum(!is.finite(recdt_perf_all90$second_alarm)),
  
  # --- MEANS ---
  mean_first_alarm           = mean(recdt_perf_all90$first_alarm[is.finite(recdt_perf_all90$first_alarm)], na.rm = TRUE),
  mean_second_alarm          = mean(recdt_perf_all90$second_alarm[is.finite(recdt_perf_all90$second_alarm)], na.rm = TRUE),

  # --- FIRST ALARM DETAILED BREAKDOWN (Sums to 78) ---
  n_first_alarm_gt_48h_before = sum(recdt_perf_all90$first_alarm < -48, na.rm = TRUE),
  n_first_alarm_24_to_48h_pre = sum(recdt_perf_all90$first_alarm < -24 & recdt_perf_all90$first_alarm >= -48, na.rm = TRUE),
  n_first_alarm_0_to_24h_pre  = sum(recdt_perf_all90$first_alarm < 0 & recdt_perf_all90$first_alarm >= -24, na.rm = TRUE),
  n_first_alarm_on_FD         = sum(recdt_perf_all90$first_alarm >= 0 & recdt_perf_all90$first_alarm <= 24, na.rm = TRUE),
  n_first_alarm_after_FD      = sum(recdt_perf_all90$first_alarm > 24 & is.finite(recdt_perf_all90$first_alarm), na.rm = TRUE),
  
  # --- SECOND ALARM BREAKDOWN (Sums to 51) ---
  n_second_alarm_before_FD    = sum(recdt_perf_all90$second_alarm < 0, na.rm = TRUE),
  n_second_alarm_on_FD        = sum(recdt_perf_all90$second_alarm >= 0 & recdt_perf_all90$second_alarm <= 24, na.rm = TRUE),
  n_second_alarm_after_FD     = sum(recdt_perf_all90$second_alarm > 24 & is.finite(recdt_perf_all90$second_alarm), na.rm = TRUE)
)

# Transpose for final presentation
print(t(recquant_res_final90))
                                 [,1]
n_total_sows                 17.00000
n_first_alarm                16.00000
n_no_first_alarm              1.00000
n_second_alarm               12.00000
n_no_second_alarm             5.00000
mean_first_alarm            -27.98438
mean_second_alarm             1.56250
n_first_alarm_gt_48h_before   3.00000
n_first_alarm_24_to_48h_pre   8.00000
n_first_alarm_0_to_24h_pre    4.00000
n_first_alarm_on_FD           0.00000
n_first_alarm_after_FD        1.00000
n_second_alarm_before_FD      5.00000
n_second_alarm_on_FD          5.00000
n_second_alarm_after_FD       2.00000

90% CI

applying kalman filter

# --- apply to all sows ---
recresults68 <- sows_greater_60 %>%
  filter(Sow_ID %in% my32sows) %>% 
  unnest(data) %>%
  filter(sensor_ts >= (start_time + hours(12))) %>%
  group_by(Sow_ID) %>%
  nest() %>%
  left_join(train_lookup, by = "Sow_ID") %>%
  mutate(
    data = map2(train_y, data, function(train, df) {
      df %>% 
        mutate(
          # 1. Standard Kalman and CI calculations
          y_kalman = fit_kalman(train, m_var_24h),
          resid    = m_var_24h - y_kalman,
          se       = zoo::rollapply(resid, width = 96, FUN = sd, fill = NA, align = "right"),
          ci_lower = y_kalman - qt(0.84, df = 95) * se,
          ci_upper = y_kalman + qt(0.84, df = 95) * se,
          
          # 2. Define the breach flags
          above_upper = m_var_24h > ci_upper,
          below_lower = m_var_24h < ci_lower,
          
          # 3. Create the index for the FIRST spike
          # We use if_else to handle sows that never spike (returning a very large number)
          first_spike_idx = if(any(above_upper, na.rm = TRUE)) min(which(above_upper == TRUE)) else Inf,
          
          # 4. Now we can safely reference first_spike_idx
          is_post_spike = row_number() > first_spike_idx,
          crossed_lower_after_upper = is_post_spike & below_lower,
          
          # 5. Status Column
          violation_type = case_when(
            m_var_24h > ci_upper ~ 1,
            m_var_24h < ci_lower ~ -1,
            TRUE ~ 0
          )
        )
    })
  ) %>%
  select(-train_y)

plotting sows

recresults68 %>%
  unnest(data) %>%
  group_by(Sow_ID) %>%
  group_walk(~ {
    baseline_end <- min(.x$time_to_farrow, na.rm = TRUE) - 12
    p <- ggplot(.x, aes(x = time_to_farrow)) +
      geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper), fill = "steelblue", alpha = 0.2) +
      geom_line(aes(y = m_var_24h), color = "black", alpha = 0.3) +
      geom_line(aes(y = y_kalman),  color = "blue", linewidth = 0.8) +
      geom_vline(xintercept = 0, linetype = "dashed") +
      geom_vline(xintercept = baseline_end)+
      expand_limits(y = 0) +
      theme_bw() +
      labs(
        title = paste("Sow", .y$Sow_ID),
        x     = "Hours relative to farrowing",
        y     = "24h variance of magnitude"
      )
    print(p)
  })

Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
-Inf

creating table for results

# 1. Ensure the per-sow summary is fresh
recdt_perf_all68 <- recresults68 %>%
  unnest(data) %>%
  group_by(Sow_ID) %>%
  summarise(
    first_alarm  = min(time_to_farrow[above_upper == TRUE], na.rm = TRUE),
    second_alarm = min(time_to_farrow[crossed_lower_after_upper == TRUE], na.rm = TRUE),
    .groups = "drop"
  )
Warning: There were 3 warnings in `summarise()`.
The first warning was:
ℹ In argument: `first_alarm = min(time_to_farrow[above_upper == TRUE], na.rm =
  TRUE)`.
ℹ In group 8: `Sow_ID = 31482`.
Caused by warning in `min()`:
! no non-missing arguments to min; returning Inf
ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.
# 2. Build the final results table with lead-time tiers
recquant_res_final68 <- data.table(
  # --- OVERALL COUNTS ---
  n_total_sows               = nrow(recdt_perf_all68),
  n_first_alarm              = sum(is.finite(recdt_perf_all68$first_alarm)),
  n_no_first_alarm           = sum(!is.finite(recdt_perf_all68$first_alarm)),
  n_second_alarm             = sum(is.finite(recdt_perf_all68$second_alarm)),
  n_no_second_alarm          = sum(!is.finite(recdt_perf_all68$second_alarm)),
  
  # --- MEANS ---
  mean_first_alarm           = mean(recdt_perf_all68$first_alarm[is.finite(recdt_perf_all68$first_alarm)], na.rm = TRUE),
  mean_second_alarm          = mean(recdt_perf_all68$second_alarm[is.finite(recdt_perf_all68$second_alarm)], na.rm = TRUE),

  # --- FIRST ALARM DETAILED BREAKDOWN (Sums to 78) ---
  n_first_alarm_gt_48h_before = sum(recdt_perf_all68$first_alarm < -48, na.rm = TRUE),
  n_first_alarm_24_to_48h_pre = sum(recdt_perf_all68$first_alarm < -24 & recdt_perf_all68$first_alarm >= -48, na.rm = TRUE),
  n_first_alarm_0_to_24h_pre  = sum(recdt_perf_all68$first_alarm < 0 & recdt_perf_all68$first_alarm >= -24, na.rm = TRUE),
  n_first_alarm_on_FD         = sum(recdt_perf_all68$first_alarm >= 0 & recdt_perf_all68$first_alarm <= 24, na.rm = TRUE),
  n_first_alarm_after_FD      = sum(recdt_perf_all68$first_alarm > 24 & is.finite(recdt_perf_all68$first_alarm), na.rm = TRUE),
  
  # --- SECOND ALARM BREAKDOWN (Sums to 51) ---
  n_second_alarm_before_FD    = sum(recdt_perf_all68$second_alarm < 0, na.rm = TRUE),
  n_second_alarm_on_FD        = sum(recdt_perf_all68$second_alarm >= 0 & recdt_perf_all68$second_alarm <= 24, na.rm = TRUE),
  n_second_alarm_after_FD     = sum(recdt_perf_all68$second_alarm > 24 & is.finite(recdt_perf_all68$second_alarm), na.rm = TRUE)
)

# Transpose for final presentation
print(t(recquant_res_final68))
                                  [,1]
n_total_sows                 17.000000
n_first_alarm                16.000000
n_no_first_alarm              1.000000
n_second_alarm               15.000000
n_no_second_alarm             2.000000
mean_first_alarm            -37.093750
mean_second_alarm            -2.916667
n_first_alarm_gt_48h_before   6.000000
n_first_alarm_24_to_48h_pre   7.000000
n_first_alarm_0_to_24h_pre    2.000000
n_first_alarm_on_FD           0.000000
n_first_alarm_after_FD        1.000000
n_second_alarm_before_FD      7.000000
n_second_alarm_on_FD          8.000000
n_second_alarm_after_FD       0.000000

Results - comparing different CI values

# 1. Transpose each table and convert to a data frame-like structure
# We use t() to flip them and as.data.frame to keep the row names
recres_68 <- as.data.frame(t(recquant_res_final68))
recres_90 <- as.data.frame(t(recquant_res_final90))
recres_95 <- as.data.frame(t(recquant_res_final95))

# 2. Combine them into one table
comparison_table <- cbind(recres_68, recres_90, recres_95)

# 3. Rename the columns for clarity
colnames(comparison_table) <- c("CI_68", "CI_90", "CI_95")

# 4. View the result
print(comparison_table)
                                 CI_68     CI_90     CI_95
n_total_sows                 17.000000  17.00000  17.00000
n_first_alarm                16.000000  16.00000  15.00000
n_no_first_alarm              1.000000   1.00000   2.00000
n_second_alarm               15.000000  12.00000   9.00000
n_no_second_alarm             2.000000   5.00000   8.00000
mean_first_alarm            -37.093750 -27.98438 -26.91667
mean_second_alarm            -2.916667   1.56250   2.25000
n_first_alarm_gt_48h_before   6.000000   3.00000   3.00000
n_first_alarm_24_to_48h_pre   7.000000   8.00000   7.00000
n_first_alarm_0_to_24h_pre    2.000000   4.00000   3.00000
n_first_alarm_on_FD           0.000000   0.00000   1.00000
n_first_alarm_after_FD        1.000000   1.00000   1.00000
n_second_alarm_before_FD      7.000000   5.00000   4.00000
n_second_alarm_on_FD          8.000000   5.00000   3.00000
n_second_alarm_after_FD       0.000000   2.00000   2.00000