isotree_raw

options(warn = -1)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(lmerTest)
Loading required package: lme4
Loading required package: Matrix

Attaching package: 'lmerTest'
The following object is masked from 'package:lme4':

    lmer
The following object is masked from 'package:stats':

    step
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   3.5.1     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(modelr)
library(purrr)
library(emmeans)
Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'
library(gridExtra)

Attaching package: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine
library(writexl)
library(gt)
library(webshot2)
library(broom.mixed)
library(ggplot2)
library(isotree)
library(tidyr)
library(ggforce)


load("Z:/Isaac/Visual Features/1-5/step1.RData")
# Need to create a new wide df


features_to_use_mean <- c("Rightmost.X", "Width")

df_subset_mean <- df_10min_raw %>%
  select(sow, ttf, feature, mean_value)

df_wide_mean <- df_subset_mean %>%
  filter(feature %in% features_to_use_mean) %>%
  pivot_wider(
    names_from  = feature,
    values_from = mean_value,
    names_glue  = "{feature}_mean"
  ) %>% 
  select(sow,ttf,Width_mean,Rightmost.X_mean)

features_to_use_var <- c("Centroid.X", "Height","Major.Axis.Length","Rightmost.X","Width")

df_subset_var <- df_10min_raw %>%
  select(sow, ttf, feature, var_value)

df_wide_var <- df_subset_var %>%
  filter(feature %in% features_to_use_var) %>%
  pivot_wider(
    names_from  = feature,
    values_from = var_value,
    names_glue  = "{feature}_var"
  ) %>% 
  select(sow,ttf,Centroid.X_var, Height_var,Major.Axis.Length_var,Rightmost.X_var,Width_var)

df_wide_10 <- df_wide_mean %>% 
  left_join(df_wide_var,by=c("sow","ttf"))

colnames((df_wide_10))
[1] "sow"                   "ttf"                   "Width_mean"           
[4] "Rightmost.X_mean"      "Centroid.X_var"        "Height_var"           
[7] "Major.Axis.Length_var" "Rightmost.X_var"       "Width_var"            

ISOTREES for -50 and before as training

baseline_df <- df_wide_10 %>%
  filter(between(ttf,-116.31051,-50)) %>% 
  ungroup()

colnames(baseline_df)
[1] "sow"                   "ttf"                   "Width_mean"           
[4] "Rightmost.X_mean"      "Centroid.X_var"        "Height_var"           
[7] "Major.Axis.Length_var" "Rightmost.X_var"       "Width_var"            
model <- isolation.forest(
  data = baseline_df %>% select(-sow,-ttf),
  ntrees = 500
)

newdata <- df_wide_10

colnames(newdata)
[1] "sow"                   "ttf"                   "Width_mean"           
[4] "Rightmost.X_mean"      "Centroid.X_var"        "Height_var"           
[7] "Major.Axis.Length_var" "Rightmost.X_var"       "Width_var"            
newdata$anomaly_score <- predict(
  model,
  newdata = newdata %>% select(-sow,-ttf)
)

ggplot(newdata,aes(x=ttf,y=anomaly_score))+
  geom_point()+geom_smooth()+geom_vline(xintercept=-50)
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(newdata, aes(x = ttf, y = anomaly_score,color=sow)) +
  # geom_point(size = 1, alpha = 0.5) +
  geom_smooth(method = "loess" , se = FALSE) +
  geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  #facet_wrap(~ sow, scales = "free_y") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

newdata_trained <- newdata %>%
  group_by(sow) %>%
  filter(any(ttf < -50)) %>%
  ungroup()

newdata_untrained <- newdata %>%
  group_by(sow) %>%
  filter(!any(ttf < -50)) %>%
  ungroup()
ggplot(newdata_trained, aes(x = ttf, y = anomaly_score,color=sow)) +
  # geom_point(size = 1, alpha = 0.5) +
  geom_smooth(method = "loess" , se = FALSE) +
  geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  #facet_wrap(~ sow, scales = "free_y") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

ggplot(newdata_untrained, aes(x = ttf, y = anomaly_score,color=sow)) +
  # geom_point(size = 1, alpha = 0.5) +
  geom_smooth(method = "loess" , se = FALSE) +
  #geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  #facet_wrap(~ sow, scales = "free_y") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

newdata_trained <- newdata_trained %>%
  mutate(group = "trained")

newdata_untrained <- newdata_untrained %>%
  mutate(group = "untrained")

combined <- bind_rows(newdata_trained, newdata_untrained)

colnames(combined)
 [1] "sow"                   "ttf"                   "Width_mean"           
 [4] "Rightmost.X_mean"      "Centroid.X_var"        "Height_var"           
 [7] "Major.Axis.Length_var" "Rightmost.X_var"       "Width_var"            
[10] "anomaly_score"         "group"                
ggplot(combined, aes(x = ttf, y = anomaly_score, color = sow)) +
  geom_smooth(method = "loess", se = FALSE) +
  geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  facet_wrap(~ group, scales = "fixed") +   # <-- same y scale
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'