isotrees_1_16

options(warn = -1)

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(lmerTest)

Loading required package: lme4

Loading required package: Matrix


Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   3.5.1     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(modelr)
library(purrr)
library(emmeans)

Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'

library(gridExtra)


Attaching package: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine

library(writexl)
library(gt)
library(webshot2)
library(broom.mixed)
library(ggplot2)
library(isotree)


load("Z:/Isaac/Visual Features/1-5/step2.RData")

Features of interest

mean model: 10min Resid Width 20min Resid Width 30min Resid Width 60min Resid Width 10min Resid Rightmost.X 20min Resid Rightmost.X 30min Resid Rightmost.X 60min Resid Rightmost.X 10min mean_val Rightmost.Y 20min mean_val Rightmost.Y 30min mean_val Rightmost.Y 60min mean_val Rightmost.Y 10min mean_val Eccentricity 20min mean_val Eccentricity 30min mean_val Eccentricity 60min mean_val Eccentricity 10min mean_val Elongation 20min mean_val Elongation 30min mean_val Elongation 60min mean_val Elongation 10min mean_val Concavity 20min mean_val Concavity 30min mean_val Concavity 60min mean_val Concavity 10min mean_val Height 20min mean_val Height 30min mean_val Height 60min mean_val Height 10min mean_val Minor.Axis.Length 20min mean_val Minor.Axis.Length 30min mean_val Minor.Axis.Length 60min mean_val Minor.Axis.Length

var model: 10min Resid Centroid.X 20min Resid Centroid.X 30min Resid Centroid.X 60min Resid Centroid.X 10min Resid Height 20min Resid Height 30min Resid Height 60min Resid Height 10min Resid Major.Axis.Length 20min Resid Major.Axis.Length 30min Resid Major.Axis.Length 10min Resid Rightmost.X 20min Resid Rightmost.X 30min Resid Rightmost.X 10min Resid Width 20min Resid Width 30min Resid Width 10min mean_val Convex.Area 20min mean_val Convex.Area 30min mean_val Convex.Area 60min mean_val Convex.Area 10min mean_val Minor.Axis.Length 20min mean_val Minor.Axis.Length 30min mean_val Minor.Axis.Length 60min mean_val Minor.Axis.Length 10min mean_val Height 20min mean_val Height 30min mean_val Height 60min mean_val Height 10min mean_val Centroid.Y 20min mean_val Centroid.Y 30min mean_val Centroid.Y 60min mean_val Centroid.Y 10min mean_val Elongation 20min mean_val Elongation 30min mean_val Elongation 60min mean_val Elongation 10min mean_val Roundness 20min mean_val Roundness 30min mean_val Roundness 60min mean_val Roundness

10min Resid values

making a wide df for the resid values for 10min window

# Need to create a new wide df
library(dplyr)
library(tidyr)

features_to_use_mean <- c("Rightmost.X", "Width")

df_subset_mean <- aug_res_10 %>%
  select(sow, ttf, feature, .resid)

df_wide_mean <- df_subset_mean %>%
  filter(feature %in% features_to_use_mean) %>%
  pivot_wider(
    names_from  = feature,
    values_from = .resid,
    names_glue  = "{feature}_mean_resid"
  ) %>% 
  select(sow,ttf,Width_mean_resid,Rightmost.X_mean_resid)

features_to_use_var <- c("Centroid.X", "Height","Major.Axis.Length","Rightmost.X","Width")

df_subset_var <- aug_res_10_var %>%
  select(sow, ttf, feature, .resid)

df_wide_var <- df_subset_var %>%
  filter(feature %in% features_to_use_var) %>%
  pivot_wider(
    names_from  = feature,
    values_from = .resid,
    names_glue  = "{feature}_var_resid"
  ) %>% 
  select(sow,ttf,Centroid.X_var_resid, Height_var_resid,Major.Axis.Length_var_resid,Rightmost.X_var_resid,Width_var_resid)

df_wide_10 <- df_wide_mean %>% 
  left_join(df_wide_var,by=c("sow","ttf"))

colnames((df_wide_10))

[1] "sow"                         "ttf"                        
[3] "Width_mean_resid"            "Rightmost.X_mean_resid"     
[5] "Centroid.X_var_resid"        "Height_var_resid"           
[7] "Major.Axis.Length_var_resid" "Rightmost.X_var_resid"      
[9] "Width_var_resid"

ISOTREES for -30 and before as training

baseline_df <- df_wide_10 %>%
  filter(between(ttf,-116.31051,-30)) %>% 
  ungroup()

colnames(baseline_df)

[1] "sow"                         "ttf"                        
[3] "Width_mean_resid"            "Rightmost.X_mean_resid"     
[5] "Centroid.X_var_resid"        "Height_var_resid"           
[7] "Major.Axis.Length_var_resid" "Rightmost.X_var_resid"      
[9] "Width_var_resid"

model <- isolation.forest(
  data = baseline_df %>% select(-sow,-ttf),
  ntrees = 500
)

newdata <- df_wide_10

colnames(newdata)

[1] "sow"                         "ttf"                        
[3] "Width_mean_resid"            "Rightmost.X_mean_resid"     
[5] "Centroid.X_var_resid"        "Height_var_resid"           
[7] "Major.Axis.Length_var_resid" "Rightmost.X_var_resid"      
[9] "Width_var_resid"

newdata$anomaly_score <- predict(
  model,
  newdata = newdata %>% select(-sow,-ttf)
)

ggplot(newdata,aes(x=ttf,y=anomaly_score))+
  geom_point()+geom_smooth()+geom_vline(xintercept=-30)

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(newdata, aes(x = ttf, y = anomaly_score)) +
  # geom_point(size = 1, alpha = 0.5) +
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  geom_vline(xintercept = -30, linetype = "dashed", color = "red") +
  facet_wrap(~ sow, scales = "free_y") +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

ISOTREES for -60 and before as training

baseline_df <- df_wide_10 %>%
  filter(between(ttf,-116.31051,-60)) %>% 
  ungroup()

colnames(baseline_df)

[1] "sow"                         "ttf"                        
[3] "Width_mean_resid"            "Rightmost.X_mean_resid"     
[5] "Centroid.X_var_resid"        "Height_var_resid"           
[7] "Major.Axis.Length_var_resid" "Rightmost.X_var_resid"      
[9] "Width_var_resid"

model <- isolation.forest(
  data = baseline_df %>% select(-sow,-ttf),
  ntrees = 500
)

newdata <- df_wide_10

colnames(newdata)

[1] "sow"                         "ttf"                        
[3] "Width_mean_resid"            "Rightmost.X_mean_resid"     
[5] "Centroid.X_var_resid"        "Height_var_resid"           
[7] "Major.Axis.Length_var_resid" "Rightmost.X_var_resid"      
[9] "Width_var_resid"

newdata$anomaly_score <- predict(
  model,
  newdata = newdata %>% select(-sow,-ttf)
)

ggplot(newdata,aes(x=ttf,y=anomaly_score))+
  geom_point()+geom_smooth()+geom_vline(xintercept=-60)

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(newdata, aes(x = ttf, y = anomaly_score)) +
  # geom_point(size = 1, alpha = 0.5) +
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  geom_vline(xintercept = -60, linetype = "dashed", color = "red") +
  facet_wrap(~ sow, scales = "free_y") +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

ISOTREES for -90 and before as training

baseline_df <- df_wide_10 %>%
  filter(between(ttf,-116.31051,-90)) %>% 
  ungroup()

colnames(baseline_df)

[1] "sow"                         "ttf"                        
[3] "Width_mean_resid"            "Rightmost.X_mean_resid"     
[5] "Centroid.X_var_resid"        "Height_var_resid"           
[7] "Major.Axis.Length_var_resid" "Rightmost.X_var_resid"      
[9] "Width_var_resid"

model <- isolation.forest(
  data = baseline_df %>% select(-sow,-ttf),
  ntrees = 500
)

newdata <- df_wide_10

colnames(newdata)

[1] "sow"                         "ttf"                        
[3] "Width_mean_resid"            "Rightmost.X_mean_resid"     
[5] "Centroid.X_var_resid"        "Height_var_resid"           
[7] "Major.Axis.Length_var_resid" "Rightmost.X_var_resid"      
[9] "Width_var_resid"

newdata$anomaly_score <- predict(
  model,
  newdata = newdata %>% select(-sow,-ttf)
)

ggplot(newdata,aes(x=ttf,y=anomaly_score))+
  geom_point()+geom_smooth()+geom_vline(xintercept=-90)

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(newdata, aes(x = ttf, y = anomaly_score)) +
  # geom_point(size = 1, alpha = 0.5) +
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  geom_vline(xintercept = -90, linetype = "dashed", color = "red") +
  facet_wrap(~ sow, scales = "free_y") +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'