options(warn = -1)Untitled
Removing excess warnings from plots
Loading Libraries
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(lmerTest)Loading required package: lme4
Loading required package: Matrix
Attaching package: 'lmerTest'
The following object is masked from 'package:lme4':
lmer
The following object is masked from 'package:stats':
step
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0 ✔ readr 2.1.5
✔ ggplot2 3.5.1 ✔ stringr 1.5.1
✔ lubridate 1.9.3 ✔ tibble 3.2.1
✔ purrr 1.0.2 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
✖ tidyr::pack() masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(modelr)
library(purrr)
library(emmeans)Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'
library(gridExtra)
Attaching package: 'gridExtra'
The following object is masked from 'package:dplyr':
combine
library(writexl)
library(gt)
library(webshot2)
library(broom.mixed)
library(ggplot2)
library(isotree)
library(tidyr)
library(ggforce)
library(plotrix)Loading in Data
load("Z:/Isaac/Visual Features/1-5/step1.RData")creating a wide df for all sows for selected features
# Need to create a new wide df
features_to_use_mean <- c("Rightmost.X", "Width")
df_subset_mean <- df_10min_raw %>%
select(sow, ttf, feature, mean_value)
df_wide_mean <- df_subset_mean %>%
filter(feature %in% features_to_use_mean) %>%
pivot_wider(
names_from = feature,
values_from = mean_value,
names_glue = "{feature}_mean"
) %>%
select(sow,ttf,Width_mean,Rightmost.X_mean)
features_to_use_var <- c("Centroid.X", "Height","Major.Axis.Length","Rightmost.X","Width")
df_subset_var <- df_10min_raw %>%
select(sow, ttf, feature, var_value)
df_wide_var <- df_subset_var %>%
filter(feature %in% features_to_use_var) %>%
pivot_wider(
names_from = feature,
values_from = var_value,
names_glue = "{feature}_var"
) %>%
select(sow,ttf,Centroid.X_var, Height_var,Major.Axis.Length_var,Rightmost.X_var,Width_var)
df_wide_10 <- df_wide_mean %>%
left_join(df_wide_var,by=c("sow","ttf"))
colnames((df_wide_10))[1] "sow" "ttf" "Width_mean"
[4] "Rightmost.X_mean" "Centroid.X_var" "Height_var"
[7] "Major.Axis.Length_var" "Rightmost.X_var" "Width_var"
defining function to get ttf value at peak
find_max<-function(x,y){
min_pos<-x[which.max(y)]
return(min_pos)
}Nesting df per sow
keeps sows with greater than 50 hours of recording before farrowing
nested_df_of_selected <- df_wide_10 %>%
group_by(sow) %>%
filter(any(ttf < -50)) %>%
ungroup()Nests by sow and has a full tibble (for testing) and a baseline tibble (for training)
nested_df_of_selected <- nested_df_of_selected %>%
nest(full = -sow) %>%
mutate(
baseline=map(full,~.x %>% filter(between(ttf, -120,-50)))
)creates a new column for a model for each sow
nested_df_of_selected <- nested_df_of_selected %>%
mutate(
iso_model = map(baseline, ~isolation.forest(data=.x %>% select(-ttf),ntrees = 500))
)Scores data based on model trained on baseline
nested_df_of_selected <- nested_df_of_selected %>%
mutate(
full_scored = map2(
full,
iso_model,
~ .x %>%
mutate(
anomaly_score = predict(
.y,
newdata = .x %>% select(-ttf)
)
)
)
)applies loess function to get fitted values on anomaly scores
nested_df_of_selected <- nested_df_of_selected %>%
mutate(
full_loess = map(
full_scored,
~ {
lo <- loess(anomaly_score ~ ttf, data = .x, span = 0.75)
.x %>% mutate(fitted = lo$fitted)
}
)
)obtaining peak location
nested_df_of_selected <- nested_df_of_selected %>%
mutate(
summary_stats = map(
full_loess,
~ .x %>%
summarise(
peak_loc = ttf[which.max(fitted)],
peak_value = max(fitted, na.rm=TRUE)
)
)
)creating a table for the peaks and the values of the peaks from the fitted values
ind_peak <- nested_df_of_selected %>%
unnest(summary_stats) %>%
select(sow,peak_loc,peak_value)obtaining average ttf at peak and peak value with standard error
ind_peak_summary <- tibble(
Metric = c("Peak TTF", "Peak Value"),
Mean = c(
mean(ind_peak$peak_loc, na.rm = TRUE),
mean(ind_peak$peak_value, na.rm = TRUE)
),
SE = c(
std.error(ind_peak$peak_loc),
std.error(ind_peak$peak_value)
)
)
ind_peak_summary# A tibble: 2 × 3
Metric Mean SE
<chr> <dbl> <dbl>
1 Peak TTF -15.1 2.61
2 Peak Value 0.488 0.00458
plotting per sow
naming averages to use in plots
avg_peak_ttf <- ind_peak_summary %>%
filter(Metric == "Peak TTF") %>%
pull(Mean)
avg_peak_val <- ind_peak_summary %>%
filter(Metric == "Peak Value") %>%
pull(Mean)Adding peak data into nested tibble
nested_df_of_selected <- nested_df_of_selected %>%
unnest(summary_stats)creating plots per sow
nested_df_of_selected <- nested_df_of_selected %>%
mutate(
plot = pmap(
list(full_loess, peak_loc, sow),
~ ggplot(..1, aes(ttf, fitted)) +
geom_line(aes(color="Fitted Curve"),size = 1) +
geom_vline(aes(xintercept = -50, color = "-50 TTF"),
linetype = "dashed", linewidth = 1) +
geom_vline(aes(xintercept = ..2, color = "Individual Peak"),
linewidth = 1) +
geom_vline(aes(xintercept = avg_peak_ttf,
color = "Average Peak TTF"),
linewidth = 1) +
geom_hline(aes(yintercept = avg_peak_val,
color = "Average Peak Value"),
linewidth = 1) +
labs(
title = paste0(
"Sow ", ..3,
" | Peak TTF: ", round(..2, 1)
),
x = "Time to Farrowing (ttf)",
y = "LOESS Fitted Anomaly Score"
) +
theme_minimal()
)
)printing all plots
walk(nested_df_of_selected$plot, print)