isotree_raw

options(warn = -1)

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(lmerTest)

Loading required package: lme4

Loading required package: Matrix


Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   3.5.1     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(modelr)
library(purrr)
library(emmeans)

Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'

library(gridExtra)


Attaching package: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine

library(writexl)
library(gt)
library(webshot2)
library(broom.mixed)
library(ggplot2)
library(isotree)
library(tidyr)
library(ggforce)


load("Z:/Isaac/Visual Features/1-5/step1.RData")

# Need to create a new wide df


features_to_use_mean <- c("Rightmost.X", "Width")

df_subset_mean <- df_10min_raw %>%
  select(sow, ttf, feature, mean_value)

df_wide_mean <- df_subset_mean %>%
  filter(feature %in% features_to_use_mean) %>%
  pivot_wider(
    names_from  = feature,
    values_from = mean_value,
    names_glue  = "{feature}_mean"
  ) %>% 
  select(sow,ttf,Width_mean,Rightmost.X_mean)

features_to_use_var <- c("Centroid.X", "Height","Major.Axis.Length","Rightmost.X","Width")

df_subset_var <- df_10min_raw %>%
  select(sow, ttf, feature, var_value)

df_wide_var <- df_subset_var %>%
  filter(feature %in% features_to_use_var) %>%
  pivot_wider(
    names_from  = feature,
    values_from = var_value,
    names_glue  = "{feature}_var"
  ) %>% 
  select(sow,ttf,Centroid.X_var, Height_var,Major.Axis.Length_var,Rightmost.X_var,Width_var)

df_wide_10 <- df_wide_mean %>% 
  left_join(df_wide_var,by=c("sow","ttf"))

colnames((df_wide_10))

[1] "sow"                   "ttf"                   "Width_mean"           
[4] "Rightmost.X_mean"      "Centroid.X_var"        "Height_var"           
[7] "Major.Axis.Length_var" "Rightmost.X_var"       "Width_var"

# sow12 <- df_wide_10 %>% 
#   filter(sow=="18")


baseline <- df_wide_10 %>% 
  group_by(sow) %>%
  filter(any(ttf < -50)) %>%
  ungroup() %>% 
  filter(between(ttf,-120,-50)) %>% 
  ungroup()

model <- isolation.forest(
  data = baseline %>% select(-sow,-ttf),
  ntrees = 500
)

newdata <- df_wide_10 %>% 
  group_by(sow) %>%
  filter(any(ttf < -50)) %>%
  ungroup() 

newdata$anomaly_score <- predict(
  model,
  newdata = newdata %>% select(-sow,-ttf)
)

ggplot(newdata,aes(x=ttf,y=anomaly_score))+
  geom_point()+geom_smooth()+geom_vline(xintercept=-50)

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

nested_data <- newdata%>%
  nest(data = -sow)
nested_data

# A tibble: 9 × 2
  sow   data                
  <fct> <list>              
1 4     <tibble [1,076 × 9]>
2 6     <tibble [1,076 × 9]>
3 8     <tibble [1,074 × 9]>
4 10    <tibble [1,085 × 9]>
5 12    <tibble [1,074 × 9]>
6 18    <tibble [1,076 × 9]>
7 22    <tibble [1,082 × 9]>
8 24    <tibble [1,083 × 9]>
9 26    <tibble [1,081 × 9]>

cuves <- nested_data %>%
  mutate(loess_fit = map(data, loess,formula=anomaly_score~ttf,span=0.75),
                         fitted = purrr::map(loess_fit, `[[`, "fitted")
                         )
 
results <- cuves %>%
  dplyr::select(-loess_fit) %>% 
  tidyr::unnest(cols = c(data, fitted))

ggplot(results, aes(x = ttf, y = anomaly_score,color=sow)) +
  geom_point(aes(x = ttf, y = fitted),size = 1, alpha = 1,color="darkgray") +
  geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  #facet_wrap(~ sow, scales = "free_y")+
   theme(legend.position = "none")

find_max<-function(x,y){
  min_pos<-x[which.max(y)]
  return(min_pos)
}
 
mns_trained_on_all<-group_by(results,sow)%>%summarise(mx_loc=find_max(x=ttf,y=fitted))
mns_trained_on_all

# A tibble: 9 × 2
  sow   mx_loc
  <fct>  <dbl>
1 4     -22.1 
2 6     -14.7 
3 8     -17.2 
4 10    -16.0 
5 12    -27.0 
6 18     -2.82
7 22    -23.9 
8 24    -17.8 
9 26    -13.1

save(mns_trained_on_all,file="Z:/Isaac/Visual Features/1-5/peak table/trained_on_all.RData")

ISOTREES for -50 and before as training

baseline_df <- df_wide_10 %>%
  filter(between(ttf,-116.31051,-50)) %>% 
  ungroup()

colnames(baseline_df)

[1] "sow"                   "ttf"                   "Width_mean"           
[4] "Rightmost.X_mean"      "Centroid.X_var"        "Height_var"           
[7] "Major.Axis.Length_var" "Rightmost.X_var"       "Width_var"

model <- isolation.forest(
  data = baseline_df %>% select(-sow,-ttf),
  ntrees = 500
)

newdata <- df_wide_10

colnames(newdata)

[1] "sow"                   "ttf"                   "Width_mean"           
[4] "Rightmost.X_mean"      "Centroid.X_var"        "Height_var"           
[7] "Major.Axis.Length_var" "Rightmost.X_var"       "Width_var"

newdata$anomaly_score <- predict(
  model,
  newdata = newdata %>% select(-sow,-ttf)
)

ggplot(newdata,aes(x=ttf,y=anomaly_score))+
  geom_point()+geom_smooth()+geom_vline(xintercept=-50)

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(newdata, aes(x = ttf, y = anomaly_score,color=sow)) +
  # geom_point(size = 1, alpha = 0.5) +
  geom_smooth(method = "loess" , se = FALSE) +
  geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  #facet_wrap(~ sow, scales = "free_y") +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

newdata_trained <- newdata %>%
  group_by(sow) %>%
  filter(any(ttf < -50)) %>%
  ungroup()

newdata_untrained <- newdata %>%
  group_by(sow) %>%
  filter(!any(ttf < -50)) %>%
  ungroup()

ggplot(newdata_trained, aes(x = ttf, y = anomaly_score,color=sow)) +
  # geom_point(size = 1, alpha = 0.5) +
  geom_smooth(method = "loess" , se = FALSE) +
  geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  #facet_wrap(~ sow, scales = "free_y") +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

ggplot(newdata_untrained, aes(x = ttf, y = anomaly_score,color=sow)) +
  # geom_point(size = 1, alpha = 0.5) +
  geom_smooth(method = "loess" , se = FALSE) +
  #geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  #facet_wrap(~ sow, scales = "free_y") +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

newdata_trained <- newdata_trained %>%
  mutate(group = "Previously Seen Sows")

newdata_untrained <- newdata_untrained %>%
  mutate(group = "Previously Unseen Sows")

combined <- bind_rows(newdata_trained, newdata_untrained)

colnames(combined)

 [1] "sow"                   "ttf"                   "Width_mean"           
 [4] "Rightmost.X_mean"      "Centroid.X_var"        "Height_var"           
 [7] "Major.Axis.Length_var" "Rightmost.X_var"       "Width_var"            
[10] "anomaly_score"         "group"

ggplot(combined, aes(x = ttf, y = anomaly_score, color = sow)) +
  geom_smooth(method = "loess", se = FALSE) +
  geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  facet_wrap(~ group, scales = "fixed")

`geom_smooth()` using formula = 'y ~ x'

nested_data <- newdata_trained%>%
  nest(data = -sow)
nested_data

# A tibble: 9 × 2
  sow   data                 
  <fct> <list>               
1 4     <tibble [1,076 × 10]>
2 6     <tibble [1,076 × 10]>
3 8     <tibble [1,074 × 10]>
4 10    <tibble [1,085 × 10]>
5 12    <tibble [1,074 × 10]>
6 18    <tibble [1,076 × 10]>
7 22    <tibble [1,082 × 10]>
8 24    <tibble [1,083 × 10]>
9 26    <tibble [1,081 × 10]>

cuves <- nested_data %>%
  mutate(loess_fit = map(data, loess,formula=anomaly_score~ttf,span=0.75),
                         fitted = purrr::map(loess_fit, `[[`, "fitted")
                         )
 
results <- cuves %>%
  dplyr::select(-loess_fit) %>% 
  tidyr::unnest(cols = c(data, fitted))
 
ggplot(results, aes(x = ttf, y = anomaly_score,color=sow)) +
  geom_point(aes(x = ttf, y = fitted),size = 1, alpha = 1,color="darkgray") +
  geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  facet_wrap(~ sow, scales = "free_y")+
   theme(legend.position = "none")

find_max<-function(x,y){
  min_pos<-x[which.max(y)]
  return(min_pos)
}
 
mns<-group_by(results,sow)%>%summarise(mx_loc=find_max(x=ttf,y=fitted))
mns

# A tibble: 9 × 2
  sow   mx_loc
  <fct>  <dbl>
1 4     -22.1 
2 6     -14.7 
3 8     -17.2 
4 10    -16.0 
5 12    -27.0 
6 18     -2.82
7 22    -23.9 
8 24    -17.8 
9 26    -13.1

nested_data <- newdata_untrained%>%
  nest(data = -sow)
nested_data

# A tibble: 9 × 2
  sow   data               
  <fct> <list>             
1 1     <tibble [296 × 10]>
2 3     <tibble [295 × 10]>
3 5     <tibble [297 × 10]>
4 9     <tibble [304 × 10]>
5 11    <tibble [295 × 10]>
6 13    <tibble [303 × 10]>
7 19    <tibble [300 × 10]>
8 23    <tibble [297 × 10]>
9 29    <tibble [441 × 10]>

cuves <- nested_data %>%
  mutate(loess_fit = map(data, loess,formula=anomaly_score~ttf,span=0.75),
                         fitted = purrr::map(loess_fit, `[[`, "fitted")
                         )
 
results <- cuves %>%
  dplyr::select(-loess_fit) %>% 
  tidyr::unnest(cols = c(data, fitted))
 
ggplot(results, aes(x = ttf, y = anomaly_score,color=sow)) +
  geom_point(aes(x = ttf, y = fitted),size = 1, alpha = 1,color="darkgray") +
  geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  facet_wrap(~ sow, scales = "free_y")+
   theme(legend.position = "none")

find_max<-function(x,y){
  min_pos<-x[which.max(y)]
  return(min_pos)
}
 
mns<-group_by(results,sow)%>%summarise(mx_loc=find_max(x=ttf,y=fitted))
mns

# A tibble: 9 × 2
  sow    mx_loc
  <fct>   <dbl>
1 1     -18.2  
2 3      -5.35 
3 5       0.623
4 9     -35.9  
5 11     35.6  
6 13    -10.5  
7 19     -1.72 
8 23    -27.9  
9 29     -6.97