isotrees_1_16

options(warn = -1)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(lmerTest)
Loading required package: lme4
Loading required package: Matrix

Attaching package: 'lmerTest'
The following object is masked from 'package:lme4':

    lmer
The following object is masked from 'package:stats':

    step
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   3.5.1     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(modelr)
library(purrr)
library(emmeans)
Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'
library(gridExtra)

Attaching package: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine
library(writexl)
library(gt)
library(webshot2)
library(broom.mixed)
library(ggplot2)
library(isotree)


load("Z:/Isaac/Visual Features/1-5/step2.RData")

Features of interest

mean model: 10min Resid Width 20min Resid Width 30min Resid Width 60min Resid Width 10min Resid Rightmost.X 20min Resid Rightmost.X 30min Resid Rightmost.X 60min Resid Rightmost.X 10min mean_val Rightmost.Y 20min mean_val Rightmost.Y 30min mean_val Rightmost.Y 60min mean_val Rightmost.Y 10min mean_val Eccentricity 20min mean_val Eccentricity 30min mean_val Eccentricity 60min mean_val Eccentricity 10min mean_val Elongation 20min mean_val Elongation 30min mean_val Elongation 60min mean_val Elongation 10min mean_val Concavity 20min mean_val Concavity 30min mean_val Concavity 60min mean_val Concavity 10min mean_val Height 20min mean_val Height 30min mean_val Height 60min mean_val Height 10min mean_val Minor.Axis.Length 20min mean_val Minor.Axis.Length 30min mean_val Minor.Axis.Length 60min mean_val Minor.Axis.Length

var model: 10min Resid Centroid.X 20min Resid Centroid.X 30min Resid Centroid.X 60min Resid Centroid.X 10min Resid Height 20min Resid Height 30min Resid Height 60min Resid Height 10min Resid Major.Axis.Length 20min Resid Major.Axis.Length 30min Resid Major.Axis.Length 10min Resid Rightmost.X 20min Resid Rightmost.X 30min Resid Rightmost.X 10min Resid Width 20min Resid Width 30min Resid Width 10min mean_val Convex.Area 20min mean_val Convex.Area 30min mean_val Convex.Area 60min mean_val Convex.Area 10min mean_val Minor.Axis.Length 20min mean_val Minor.Axis.Length 30min mean_val Minor.Axis.Length 60min mean_val Minor.Axis.Length 10min mean_val Height 20min mean_val Height 30min mean_val Height 60min mean_val Height 10min mean_val Centroid.Y 20min mean_val Centroid.Y 30min mean_val Centroid.Y 60min mean_val Centroid.Y 10min mean_val Elongation 20min mean_val Elongation 30min mean_val Elongation 60min mean_val Elongation 10min mean_val Roundness 20min mean_val Roundness 30min mean_val Roundness 60min mean_val Roundness

10min Resid values

making a wide df for the resid values for 10min window

# Need to create a new wide df
library(dplyr)
library(tidyr)

features_to_use_mean <- c("Rightmost.X", "Width")

df_subset_mean <- aug_res_10 %>%
  select(sow, ttf, feature, .resid)

df_wide_mean <- df_subset_mean %>%
  filter(feature %in% features_to_use_mean) %>%
  pivot_wider(
    names_from  = feature,
    values_from = .resid,
    names_glue  = "{feature}_mean_resid"
  ) %>% 
  select(sow,ttf,Width_mean_resid,Rightmost.X_mean_resid)

features_to_use_var <- c("Centroid.X", "Height","Major.Axis.Length","Rightmost.X","Width")

df_subset_var <- aug_res_10_var %>%
  select(sow, ttf, feature, .resid)

df_wide_var <- df_subset_var %>%
  filter(feature %in% features_to_use_var) %>%
  pivot_wider(
    names_from  = feature,
    values_from = .resid,
    names_glue  = "{feature}_var_resid"
  ) %>% 
  select(sow,ttf,Centroid.X_var_resid, Height_var_resid,Major.Axis.Length_var_resid,Rightmost.X_var_resid,Width_var_resid)

df_wide_10 <- df_wide_mean %>% 
  left_join(df_wide_var,by=c("sow","ttf"))

colnames((df_wide_10))
[1] "sow"                         "ttf"                        
[3] "Width_mean_resid"            "Rightmost.X_mean_resid"     
[5] "Centroid.X_var_resid"        "Height_var_resid"           
[7] "Major.Axis.Length_var_resid" "Rightmost.X_var_resid"      
[9] "Width_var_resid"            

ISOTREES for -30 and before as training

baseline_df <- df_wide_10 %>%
  filter(between(ttf,-116.31051,-30)) %>% 
  ungroup()

colnames(baseline_df)
[1] "sow"                         "ttf"                        
[3] "Width_mean_resid"            "Rightmost.X_mean_resid"     
[5] "Centroid.X_var_resid"        "Height_var_resid"           
[7] "Major.Axis.Length_var_resid" "Rightmost.X_var_resid"      
[9] "Width_var_resid"            
model <- isolation.forest(
  data = baseline_df %>% select(-sow,-ttf),
  ntrees = 500
)

newdata <- df_wide_10

colnames(newdata)
[1] "sow"                         "ttf"                        
[3] "Width_mean_resid"            "Rightmost.X_mean_resid"     
[5] "Centroid.X_var_resid"        "Height_var_resid"           
[7] "Major.Axis.Length_var_resid" "Rightmost.X_var_resid"      
[9] "Width_var_resid"            
newdata$anomaly_score <- predict(
  model,
  newdata = newdata %>% select(-sow,-ttf)
)

ggplot(newdata,aes(x=ttf,y=anomaly_score))+
  geom_point()+geom_smooth()+geom_vline(xintercept=-30)
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(newdata, aes(x = ttf, y = anomaly_score)) +
  # geom_point(size = 1, alpha = 0.5) +
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  geom_vline(xintercept = -30, linetype = "dashed", color = "red") +
  facet_wrap(~ sow, scales = "free_y") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

ISOTREES for -60 and before as training

baseline_df <- df_wide_10 %>%
  filter(between(ttf,-116.31051,-60)) %>% 
  ungroup()

colnames(baseline_df)
[1] "sow"                         "ttf"                        
[3] "Width_mean_resid"            "Rightmost.X_mean_resid"     
[5] "Centroid.X_var_resid"        "Height_var_resid"           
[7] "Major.Axis.Length_var_resid" "Rightmost.X_var_resid"      
[9] "Width_var_resid"            
model <- isolation.forest(
  data = baseline_df %>% select(-sow,-ttf),
  ntrees = 500
)

newdata <- df_wide_10

colnames(newdata)
[1] "sow"                         "ttf"                        
[3] "Width_mean_resid"            "Rightmost.X_mean_resid"     
[5] "Centroid.X_var_resid"        "Height_var_resid"           
[7] "Major.Axis.Length_var_resid" "Rightmost.X_var_resid"      
[9] "Width_var_resid"            
newdata$anomaly_score <- predict(
  model,
  newdata = newdata %>% select(-sow,-ttf)
)

ggplot(newdata,aes(x=ttf,y=anomaly_score))+
  geom_point()+geom_smooth()+geom_vline(xintercept=-60)
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(newdata, aes(x = ttf, y = anomaly_score)) +
  # geom_point(size = 1, alpha = 0.5) +
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  geom_vline(xintercept = -60, linetype = "dashed", color = "red") +
  facet_wrap(~ sow, scales = "free_y") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

ISOTREES for -90 and before as training

baseline_df <- df_wide_10 %>%
  filter(between(ttf,-116.31051,-90)) %>% 
  ungroup()

colnames(baseline_df)
[1] "sow"                         "ttf"                        
[3] "Width_mean_resid"            "Rightmost.X_mean_resid"     
[5] "Centroid.X_var_resid"        "Height_var_resid"           
[7] "Major.Axis.Length_var_resid" "Rightmost.X_var_resid"      
[9] "Width_var_resid"            
model <- isolation.forest(
  data = baseline_df %>% select(-sow,-ttf),
  ntrees = 500
)

newdata <- df_wide_10

colnames(newdata)
[1] "sow"                         "ttf"                        
[3] "Width_mean_resid"            "Rightmost.X_mean_resid"     
[5] "Centroid.X_var_resid"        "Height_var_resid"           
[7] "Major.Axis.Length_var_resid" "Rightmost.X_var_resid"      
[9] "Width_var_resid"            
newdata$anomaly_score <- predict(
  model,
  newdata = newdata %>% select(-sow,-ttf)
)

ggplot(newdata,aes(x=ttf,y=anomaly_score))+
  geom_point()+geom_smooth()+geom_vline(xintercept=-90)
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(newdata, aes(x = ttf, y = anomaly_score)) +
  # geom_point(size = 1, alpha = 0.5) +
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  geom_vline(xintercept = -90, linetype = "dashed", color = "red") +
  facet_wrap(~ sow, scales = "free_y") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

Results

We see here that here inbetween 30 and 60 hours before farrowing that there is a clear increase in anomoly score

We also notice that it is difficult to really get a good grasp on the sows that were not apart of most of the training data