isotree

options(warn = -1)

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(lmerTest)

Loading required package: lme4

Loading required package: Matrix


Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   3.5.1     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(modelr)
library(purrr)
library(emmeans)

Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'

library(gridExtra)


Attaching package: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine

library(writexl)
library(gt)
library(webshot2)
library(broom.mixed)
library(ggplot2)
library(isotree)


load("Z:/Isaac/Visual Features/1-5/step2.RData")

baseline_df <- aug_res_10_var %>%
  group_by(sow) %>%
  filter(between(ttf,-90,-60)) %>%
  select(feature,sow,.resid)%>% 
  ungroup()

# baselinecol <- baseline_df[baseline_df$feature == "Rightmost.X", ]
# model <- isolation.forest(
#   data = data.frame(baselinecol$.resid), ntrees = 200)
# 
# newdata <- aug_res_10_var %>%
#   filter(feature == "Rightmost.X")
# newdata$anomaly_score <- predict(
#   model,
#   newdata = data.frame(resid = newdata$.resid)
# )
# 
# ggplot(newdata,aes(x=ttf,y=anomaly_score))+
#   geom_point()+geom_smooth()

baselinecol <- baseline_df %>%
  filter(feature == "Rightmost.X")

model <- isolation.forest(
  data = data.frame(resid = baselinecol$.resid),
  ntrees = 200
)

newdata <- aug_res_10_var %>%
  filter(feature == "Rightmost.X")

newdata$anomaly_score <- predict(
  model,
  newdata = data.frame(resid = newdata$.resid)
)

ggplot(newdata,aes(x=ttf,y=anomaly_score))+
  geom_point()+geom_smooth()

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

# Define your features
features_to_check <- c("Rightmost.X", "Centroid.X", "Major.Axis.Length", "Minor.Axis.Length")

# Initialize column in your data
aug_res_10_var$anomaly_score <- NA

# Loop through each feature
for (feature_name in features_to_check) {
  # Train on baseline
  baselinecol <- baseline_df[baseline_df$feature == feature_name, ]
  model <- isolation.forest(baselinecol, ntrees = 200)
  
  # Score new data
  newdata <- aug_res_10_var[aug_res_10_var$feature == feature_name, ]
  scores <- predict(model, newdata = newdata)
  
  # Assign scores back
  aug_res_10_var$anomaly_score[aug_res_10_var$feature == feature_name] <- scores
  
  # Create plot
  p <- ggplot(newdata, aes(x = ttf, y = scores)) +
    geom_point() +
    geom_smooth() +
    ggtitle(paste("Anomaly Scores for", feature_name)) +
    theme_minimal()
  
  print(p)  # Display the plot
}

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

baseline_df <- aug_res_10_var %>%
  group_by(sow) %>%
  filter(between(ttf,-100,-50)) %>%
  ungroup()

# Define your features
features_to_check <- c("Rightmost.X", "Centroid.X", "Major.Axis.Length", "Minor.Axis.Length")

# Initialize column in your data
aug_res_10_var$anomaly_score <- NA

# Loop through each feature
for (feature_name in features_to_check) {
  # Train on baseline
  baselinecol <- baseline_df[baseline_df$feature == feature_name, ]
  model <- isolation.forest(baselinecol, ntrees = 200)
  
  # Score new data
  newdata <- aug_res_10_var[aug_res_10_var$feature == feature_name, ]
  scores <- predict(model, newdata = newdata)
  
  # Assign scores back
  aug_res_10_var$anomaly_score[aug_res_10_var$feature == feature_name] <- scores
  
  # Create plot
  p <- ggplot(newdata, aes(x = ttf, y = scores)) +
    geom_point() +
    geom_smooth() +
    ggtitle(paste("Anomaly Scores for", feature_name)) +
    theme_minimal()
  
  print(p)  # Display the plot
}

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

baseline_df <- aug_res_10_var %>%
  group_by(sow) %>%
  filter(between(ttf,-90,-60)) %>%
  ungroup()

# Define your features
features_to_check <- c("Rightmost.Y", "Eccentricity", "Elongation","Concavity","Height", "Minor.Axis.Length")

library(dplyr)
library(ggplot2)
library(isotree)



# initialize column
aug_res_10_var$anomaly_score <- NA_real_

for (feature_name in features_to_check) {

  ## ---- Train on baseline window ----
  baselinecol <- baseline_df %>%
    filter(feature == feature_name)

  model <- isolation.forest(
    data = data.frame(resid = baselinecol$.resid),
    ntrees = 200,
    sample_size = nrow(baselinecol)
  )

  ## ---- Score full series for this feature ----
  newdata <- aug_res_10_var %>%
    filter(feature == feature_name)

  scores <- predict(
    model,
    newdata = data.frame(resid = newdata$.resid)
  )

  ## ---- Assign scores back ----
  aug_res_10_var$anomaly_score[
    aug_res_10_var$feature == feature_name
  ] <- scores

  ## ---- Plot ----
  p <- ggplot(
    data = newdata,
    aes(x = ttf, y = scores)
  ) +
    geom_point(alpha = 0.5) +
    geom_smooth(se = FALSE) +
    ggtitle(paste("Anomaly Scores for", feature_name)) +
    theme_minimal()

  print(p)
}

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

baseline_df <- aug_res_10_var %>%
  group_by(sow) %>%
  filter(between(ttf,-100,-50)) %>%
  ungroup()

# Define your features
features_to_check <- c("Rightmost.Y", "Eccentricity", "Elongation","Concavity","Height", "Minor.Axis.Length")

library(dplyr)
library(ggplot2)
library(isotree)



# initialize column
aug_res_10_var$anomaly_score <- NA_real_

for (feature_name in features_to_check) {

  ## ---- Train on baseline window ----
  baselinecol <- baseline_df %>%
    filter(feature == feature_name)

  model <- isolation.forest(
    data = data.frame(resid = baselinecol$.resid),
    ntrees = 200,
    sample_size = nrow(baselinecol)
  )

  ## ---- Score full series for this feature ----
  newdata <- aug_res_10_var %>%
    filter(feature == feature_name)

  scores <- predict(
    model,
    newdata = data.frame(resid = newdata$.resid)
  )

  ## ---- Assign scores back ----
  aug_res_10_var$anomaly_score[
    aug_res_10_var$feature == feature_name
  ] <- scores

  ## ---- Plot ----
  p <- ggplot(
    data = newdata,
    aes(x = ttf, y = scores)
  ) +
    geom_point(alpha = 0.5) +
    geom_smooth(se = FALSE) +
    ggtitle(paste("Anomaly Scores for", feature_name)) +
    theme_minimal()

  print(p)
}

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'