options(warn = -1)isotree_raw
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(lmerTest)Loading required package: lme4
Loading required package: Matrix
Attaching package: 'lmerTest'
The following object is masked from 'package:lme4':
lmer
The following object is masked from 'package:stats':
step
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0 ✔ readr 2.1.5
✔ ggplot2 3.5.1 ✔ stringr 1.5.1
✔ lubridate 1.9.3 ✔ tibble 3.2.1
✔ purrr 1.0.2 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
✖ tidyr::pack() masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(modelr)
library(purrr)
library(emmeans)Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'
library(gridExtra)
Attaching package: 'gridExtra'
The following object is masked from 'package:dplyr':
combine
library(writexl)
library(gt)
library(webshot2)
library(broom.mixed)
library(ggplot2)
library(isotree)
library(tidyr)
library(ggforce)
load("Z:/Isaac/Visual Features/1-5/step1.RData")# Need to create a new wide df
features_to_use_mean <- c("Rightmost.X", "Width")
df_subset_mean <- df_10min_raw %>%
select(sow, ttf, feature, mean_value)
df_wide_mean <- df_subset_mean %>%
filter(feature %in% features_to_use_mean) %>%
pivot_wider(
names_from = feature,
values_from = mean_value,
names_glue = "{feature}_mean"
) %>%
select(sow,ttf,Width_mean,Rightmost.X_mean)
features_to_use_var <- c("Centroid.X", "Height","Major.Axis.Length","Rightmost.X","Width")
df_subset_var <- df_10min_raw %>%
select(sow, ttf, feature, var_value)
df_wide_var <- df_subset_var %>%
filter(feature %in% features_to_use_var) %>%
pivot_wider(
names_from = feature,
values_from = var_value,
names_glue = "{feature}_var"
) %>%
select(sow,ttf,Centroid.X_var, Height_var,Major.Axis.Length_var,Rightmost.X_var,Width_var)
df_wide_10 <- df_wide_mean %>%
left_join(df_wide_var,by=c("sow","ttf"))
colnames((df_wide_10))[1] "sow" "ttf" "Width_mean"
[4] "Rightmost.X_mean" "Centroid.X_var" "Height_var"
[7] "Major.Axis.Length_var" "Rightmost.X_var" "Width_var"
# sow12 <- df_wide_10 %>%
# filter(sow=="18")
baseline <- df_wide_10 %>%
group_by(sow) %>%
filter(any(ttf < -50)) %>%
ungroup() %>%
filter(between(ttf,-120,-50)) %>%
ungroup()
model <- isolation.forest(
data = baseline %>% select(-sow,-ttf),
ntrees = 500
)
newdata <- df_wide_10 %>%
group_by(sow) %>%
filter(any(ttf < -50)) %>%
ungroup()
newdata$anomaly_score <- predict(
model,
newdata = newdata %>% select(-sow,-ttf)
)
ggplot(newdata,aes(x=ttf,y=anomaly_score))+
geom_point()+geom_smooth()+geom_vline(xintercept=-50)`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
nested_data <- newdata%>%
nest(data = -sow)
nested_data# A tibble: 9 × 2
sow data
<fct> <list>
1 4 <tibble [1,076 × 9]>
2 6 <tibble [1,076 × 9]>
3 8 <tibble [1,074 × 9]>
4 10 <tibble [1,085 × 9]>
5 12 <tibble [1,074 × 9]>
6 18 <tibble [1,076 × 9]>
7 22 <tibble [1,082 × 9]>
8 24 <tibble [1,083 × 9]>
9 26 <tibble [1,081 × 9]>
cuves <- nested_data %>%
mutate(loess_fit = map(data, loess,formula=anomaly_score~ttf,span=0.75),
fitted = purrr::map(loess_fit, `[[`, "fitted")
)
results <- cuves %>%
dplyr::select(-loess_fit) %>%
tidyr::unnest(cols = c(data, fitted))
ggplot(results, aes(x = ttf, y = anomaly_score,color=sow)) +
geom_point(aes(x = ttf, y = fitted),size = 1, alpha = 1,color="darkgray") +
geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
#facet_wrap(~ sow, scales = "free_y")+
theme(legend.position = "none")find_max<-function(x,y){
min_pos<-x[which.max(y)]
return(min_pos)
}
mns_trained_on_all<-group_by(results,sow)%>%summarise(mx_loc=find_max(x=ttf,y=fitted))
mns_trained_on_all# A tibble: 9 × 2
sow mx_loc
<fct> <dbl>
1 4 -22.1
2 6 -14.7
3 8 -17.2
4 10 -16.0
5 12 -27.0
6 18 -2.82
7 22 -23.9
8 24 -17.8
9 26 -13.1
save(mns_trained_on_all,file="Z:/Isaac/Visual Features/1-5/peak table/trained_on_all.RData")ISOTREES for -50 and before as training
baseline_df <- df_wide_10 %>%
filter(between(ttf,-116.31051,-50)) %>%
ungroup()
colnames(baseline_df)[1] "sow" "ttf" "Width_mean"
[4] "Rightmost.X_mean" "Centroid.X_var" "Height_var"
[7] "Major.Axis.Length_var" "Rightmost.X_var" "Width_var"
model <- isolation.forest(
data = baseline_df %>% select(-sow,-ttf),
ntrees = 500
)
newdata <- df_wide_10
colnames(newdata)[1] "sow" "ttf" "Width_mean"
[4] "Rightmost.X_mean" "Centroid.X_var" "Height_var"
[7] "Major.Axis.Length_var" "Rightmost.X_var" "Width_var"
newdata$anomaly_score <- predict(
model,
newdata = newdata %>% select(-sow,-ttf)
)
ggplot(newdata,aes(x=ttf,y=anomaly_score))+
geom_point()+geom_smooth()+geom_vline(xintercept=-50)`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggplot(newdata, aes(x = ttf, y = anomaly_score,color=sow)) +
# geom_point(size = 1, alpha = 0.5) +
geom_smooth(method = "loess" , se = FALSE) +
geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
#facet_wrap(~ sow, scales = "free_y") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
newdata_trained <- newdata %>%
group_by(sow) %>%
filter(any(ttf < -50)) %>%
ungroup()
newdata_untrained <- newdata %>%
group_by(sow) %>%
filter(!any(ttf < -50)) %>%
ungroup()ggplot(newdata_trained, aes(x = ttf, y = anomaly_score,color=sow)) +
# geom_point(size = 1, alpha = 0.5) +
geom_smooth(method = "loess" , se = FALSE) +
geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
#facet_wrap(~ sow, scales = "free_y") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
ggplot(newdata_untrained, aes(x = ttf, y = anomaly_score,color=sow)) +
# geom_point(size = 1, alpha = 0.5) +
geom_smooth(method = "loess" , se = FALSE) +
#geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
#facet_wrap(~ sow, scales = "free_y") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
newdata_trained <- newdata_trained %>%
mutate(group = "Previously Seen Sows")
newdata_untrained <- newdata_untrained %>%
mutate(group = "Previously Unseen Sows")
combined <- bind_rows(newdata_trained, newdata_untrained)
colnames(combined) [1] "sow" "ttf" "Width_mean"
[4] "Rightmost.X_mean" "Centroid.X_var" "Height_var"
[7] "Major.Axis.Length_var" "Rightmost.X_var" "Width_var"
[10] "anomaly_score" "group"
ggplot(combined, aes(x = ttf, y = anomaly_score, color = sow)) +
geom_smooth(method = "loess", se = FALSE) +
geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
facet_wrap(~ group, scales = "fixed") `geom_smooth()` using formula = 'y ~ x'
nested_data <- newdata_trained%>%
nest(data = -sow)
nested_data# A tibble: 9 × 2
sow data
<fct> <list>
1 4 <tibble [1,076 × 10]>
2 6 <tibble [1,076 × 10]>
3 8 <tibble [1,074 × 10]>
4 10 <tibble [1,085 × 10]>
5 12 <tibble [1,074 × 10]>
6 18 <tibble [1,076 × 10]>
7 22 <tibble [1,082 × 10]>
8 24 <tibble [1,083 × 10]>
9 26 <tibble [1,081 × 10]>
cuves <- nested_data %>%
mutate(loess_fit = map(data, loess,formula=anomaly_score~ttf,span=0.75),
fitted = purrr::map(loess_fit, `[[`, "fitted")
)
results <- cuves %>%
dplyr::select(-loess_fit) %>%
tidyr::unnest(cols = c(data, fitted))
ggplot(results, aes(x = ttf, y = anomaly_score,color=sow)) +
geom_point(aes(x = ttf, y = fitted),size = 1, alpha = 1,color="darkgray") +
geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
facet_wrap(~ sow, scales = "free_y")+
theme(legend.position = "none")find_max<-function(x,y){
min_pos<-x[which.max(y)]
return(min_pos)
}
mns<-group_by(results,sow)%>%summarise(mx_loc=find_max(x=ttf,y=fitted))
mns# A tibble: 9 × 2
sow mx_loc
<fct> <dbl>
1 4 -22.1
2 6 -14.7
3 8 -17.2
4 10 -16.0
5 12 -27.0
6 18 -2.82
7 22 -23.9
8 24 -17.8
9 26 -13.1
nested_data <- newdata_untrained%>%
nest(data = -sow)
nested_data# A tibble: 9 × 2
sow data
<fct> <list>
1 1 <tibble [296 × 10]>
2 3 <tibble [295 × 10]>
3 5 <tibble [297 × 10]>
4 9 <tibble [304 × 10]>
5 11 <tibble [295 × 10]>
6 13 <tibble [303 × 10]>
7 19 <tibble [300 × 10]>
8 23 <tibble [297 × 10]>
9 29 <tibble [441 × 10]>
cuves <- nested_data %>%
mutate(loess_fit = map(data, loess,formula=anomaly_score~ttf,span=0.75),
fitted = purrr::map(loess_fit, `[[`, "fitted")
)
results <- cuves %>%
dplyr::select(-loess_fit) %>%
tidyr::unnest(cols = c(data, fitted))
ggplot(results, aes(x = ttf, y = anomaly_score,color=sow)) +
geom_point(aes(x = ttf, y = fitted),size = 1, alpha = 1,color="darkgray") +
geom_vline(xintercept = -50, linetype = "dashed", color = "red") +
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
facet_wrap(~ sow, scales = "free_y")+
theme(legend.position = "none")find_max<-function(x,y){
min_pos<-x[which.max(y)]
return(min_pos)
}
mns<-group_by(results,sow)%>%summarise(mx_loc=find_max(x=ttf,y=fitted))
mns# A tibble: 9 × 2
sow mx_loc
<fct> <dbl>
1 1 -18.2
2 3 -5.35
3 5 0.623
4 9 -35.9
5 11 35.6
6 13 -10.5
7 19 -1.72
8 23 -27.9
9 29 -6.97