Untitled

Removing excess warnings from plots

options(warn = -1)

Loading Libraries

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(lmerTest)
Loading required package: lme4
Loading required package: Matrix

Attaching package: 'lmerTest'
The following object is masked from 'package:lme4':

    lmer
The following object is masked from 'package:stats':

    step
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   3.5.1     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(modelr)
library(purrr)
library(emmeans)
Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'
library(gridExtra)

Attaching package: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine
library(writexl)
library(gt)
library(webshot2)
library(broom.mixed)
library(ggplot2)
library(isotree)
library(tidyr)
library(ggforce)
library(plotrix)

Loading in Data

load("Z:/Isaac/Visual Features/1-5/step1.RData")

creating a wide df for all sows for selected features

# Need to create a new wide df


features_to_use_mean <- c("Rightmost.X", "Width")

df_subset_mean <- df_10min_raw %>%
  select(sow, ttf, feature, mean_value)

df_wide_mean <- df_subset_mean %>%
  filter(feature %in% features_to_use_mean) %>%
  pivot_wider(
    names_from  = feature,
    values_from = mean_value,
    names_glue  = "{feature}_mean"
  ) %>% 
  select(sow,ttf,Width_mean,Rightmost.X_mean)

features_to_use_var <- c("Centroid.X", "Height","Major.Axis.Length","Rightmost.X","Width")

df_subset_var <- df_10min_raw %>%
  select(sow, ttf, feature, var_value)

df_wide_var <- df_subset_var %>%
  filter(feature %in% features_to_use_var) %>%
  pivot_wider(
    names_from  = feature,
    values_from = var_value,
    names_glue  = "{feature}_var"
  ) %>% 
  select(sow,ttf,Centroid.X_var, Height_var,Major.Axis.Length_var,Rightmost.X_var,Width_var)

df_wide_10 <- df_wide_mean %>% 
  left_join(df_wide_var,by=c("sow","ttf"))

colnames((df_wide_10))
[1] "sow"                   "ttf"                   "Width_mean"           
[4] "Rightmost.X_mean"      "Centroid.X_var"        "Height_var"           
[7] "Major.Axis.Length_var" "Rightmost.X_var"       "Width_var"            

defining function to get ttf value at peak

find_max<-function(x,y){
  min_pos<-x[which.max(y)]
  return(min_pos)
}

Nesting df per sow

keeps sows with greater than 50 hours of recording before farrowing

nested_df_of_selected <- df_wide_10 %>% 
  group_by(sow) %>% 
  filter(any(ttf < -50)) %>% 
  ungroup()

Nests by sow and has a full tibble (for testing) and a baseline tibble (for training)

nested_df_of_selected <- nested_df_of_selected %>% 
  nest(full = -sow) %>% 
  mutate(
    baseline=map(full,~.x %>% filter(between(ttf, -120,-50)))
  )

creates a new column for a model for each sow

nested_df_of_selected <- nested_df_of_selected %>% 
  mutate(
    iso_model = map(baseline, ~isolation.forest(data=.x %>% select(-ttf),ntrees = 500))
  )

Scores data based on model trained on baseline

nested_df_of_selected <- nested_df_of_selected %>% 
  mutate(
    full_scored = map2(
      full,
      iso_model,
      ~ .x %>% 
        mutate(
          anomaly_score = predict(
            .y,
            newdata = .x %>% select(-ttf)
          )
        )
    )
  )

applies loess function to get fitted values on anomaly scores

nested_df_of_selected <- nested_df_of_selected %>% 
  mutate(
    full_loess = map(
      full_scored,
      ~ {
        lo <- loess(anomaly_score ~ ttf, data = .x, span = 0.75)
        .x %>% mutate(fitted = lo$fitted)
      }
    )
  )

obtaining peak location

nested_df_of_selected <- nested_df_of_selected %>% 
  mutate(
    summary_stats = map(
      full_loess,
      ~ .x %>% 
        summarise(
          peak_loc = ttf[which.max(fitted)],
          peak_value = max(fitted, na.rm=TRUE)
        )
    )
  )

creating a table for the peaks and the values of the peaks from the fitted values

ind_peak <- nested_df_of_selected %>% 
  unnest(summary_stats) %>% 
  select(sow,peak_loc,peak_value)

obtaining average ttf at peak and peak value with standard error

ind_peak_summary <- tibble(
  Metric = c("Peak TTF", "Peak Value"),
  Mean   = c(
    mean(ind_peak$peak_loc, na.rm = TRUE),
    mean(ind_peak$peak_value, na.rm = TRUE)
  ),
  SE     = c(
    std.error(ind_peak$peak_loc),
    std.error(ind_peak$peak_value)
  )
)

ind_peak_summary
# A tibble: 2 × 3
  Metric        Mean      SE
  <chr>        <dbl>   <dbl>
1 Peak TTF   -15.1   2.61   
2 Peak Value   0.488 0.00458

plotting per sow

naming averages to use in plots

avg_peak_ttf <- ind_peak_summary %>%
  filter(Metric == "Peak TTF") %>%
  pull(Mean)

avg_peak_val <- ind_peak_summary %>%
  filter(Metric == "Peak Value") %>%
  pull(Mean)

Adding peak data into nested tibble

nested_df_of_selected <- nested_df_of_selected %>%
  unnest(summary_stats)

creating plots per sow

nested_df_of_selected <- nested_df_of_selected %>%
  mutate(
    plot = pmap(
      list(full_loess, peak_loc, sow),
      ~ ggplot(..1, aes(ttf, fitted)) +
          geom_line(aes(color="Fitted Curve"),size = 1) +
          geom_vline(aes(xintercept = -50, color = "-50 TTF"),
                   linetype = "dashed", linewidth = 1) +
          geom_vline(aes(xintercept = ..2, color = "Individual Peak"),
                   linewidth = 1) +
          geom_vline(aes(xintercept = avg_peak_ttf,
                       color = "Average Peak TTF"),
                   linewidth = 1) +
          geom_hline(aes(yintercept = avg_peak_val,
                       color = "Average Peak Value"),
                   linewidth = 1) +
          labs(
            title = paste0(
              "Sow ", ..3,
              " | Peak TTF: ", round(..2, 1)
            ),
            x = "Time to Farrowing (ttf)",
            y = "LOESS Fitted Anomaly Score"
          ) +
          theme_minimal()
    )
  )

printing all plots

walk(nested_df_of_selected$plot, print)