Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Loading required package: lme4
Loading required package: Matrix
Attaching package: 'lmerTest'
The following object is masked from 'package:lme4':
lmer
The following object is masked from 'package:stats':
step
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0 ✔ readr 2.1.5
✔ ggplot2 3.5.1 ✔ stringr 1.5.1
✔ lubridate 1.9.3 ✔ tibble 3.2.1
✔ purrr 1.0.2 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
✖ tidyr::pack() masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(modelr)
library(purrr)
library(emmeans)
Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'
Attaching package: 'gridExtra'
The following object is masked from 'package:dplyr':
combine
library(writexl)
library(gt)
library(webshot2)
library(broom.mixed)
library(ggplot2)
library(isotree)
library(tidyr)
library(ggforce)
# Need to create a new wide df
features_to_use_mean <- c("Rightmost.X", "Width")
df_subset_mean <- df_10min_raw %>%
select(sow, ttf, feature, mean_value)
df_wide_mean <- df_subset_mean %>%
filter(feature %in% features_to_use_mean) %>%
pivot_wider(
names_from = feature,
values_from = mean_value,
names_glue = "{feature}_mean"
) %>%
select(sow,ttf,Width_mean,Rightmost.X_mean)
features_to_use_var <- c("Centroid.X", "Height","Major.Axis.Length","Rightmost.X","Width")
df_subset_var <- df_10min_raw %>%
select(sow, ttf, feature, var_value)
df_wide_var <- df_subset_var %>%
filter(feature %in% features_to_use_var) %>%
pivot_wider(
names_from = feature,
values_from = var_value,
names_glue = "{feature}_var"
) %>%
select(sow,ttf,Centroid.X_var, Height_var,Major.Axis.Length_var,Rightmost.X_var,Width_var)
df_wide_10 <- df_wide_mean %>%
left_join(df_wide_var,by=c("sow","ttf"))
colnames((df_wide_10))
[1] "sow" "ttf" "Width_mean"
[4] "Rightmost.X_mean" "Centroid.X_var" "Height_var"
[7] "Major.Axis.Length_var" "Rightmost.X_var" "Width_var"
sowid <- c(4,6,8,10,12,18,22,24,26)
for (i in sowid){
sow12 <- df_wide_10 %>%
filter(sow == i)
# Baseline
baseline12 <- sow12 %>%
filter(between(ttf, -120, -50)) %>%
ungroup()
# Isolation Forest
model12 <- isolation.forest(
data = baseline12 %>% select(-sow, -ttf),
ntrees = 500
)
# Predict
newdata12 <- sow12
newdata12$anomaly_score <- predict(
model12,
newdata = newdata12 %>% select(-sow, -ttf)
)
# Plot
print(
ggplot(newdata12, aes(ttf, anomaly_score)) +
geom_point() +
geom_smooth() +
geom_vline(xintercept = -50)
)
# LOESS (simplified)
lo <- loess(anomaly_score ~ ttf, data = newdata12, span = 0.75)
newdata12$fitted <- lo$fitted
# Hour binning
df_fitted <- newdata12 %>%
mutate(ttf_hour = floor(ttf)) %>%
filter(ttf_hour >= -120 & ttf_hour <= 0)
# Hourly summary
hourly_summary <- df_fitted %>%
group_by(ttf_hour) %>%
summarise(mean_fitted = mean(fitted, na.rm = TRUE)) %>%
arrange(ttf_hour) %>%
mutate(
change_from_prev = mean_fitted - lag(mean_fitted),
trend = case_when(
change_from_prev > 0 ~ "higher",
change_from_prev < 0 ~ "lower",
TRUE ~ "same"
)
)
# Max anomaly location
mns <- df_fitted %>%
summarise(mx_loc = ttf[which.max(fitted)])
print(mns)
print(hourly_summary)
}
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
# A tibble: 1 × 1
mx_loc
<dbl>
1 -22.7
# A tibble: 114 × 4
ttf_hour mean_fitted change_from_prev trend
<dbl> <dbl> <dbl> <chr>
1 -113 0.408 NA same
2 -112 0.408 0.0000781 higher
3 -111 0.408 0.000156 higher
4 -110 0.408 0.000166 higher
5 -109 0.408 0.000176 higher
6 -108 0.409 0.000188 higher
7 -107 0.409 0.000199 higher
8 -106 0.409 0.000211 higher
9 -105 0.409 0.000224 higher
10 -104 0.409 0.000237 higher
# ℹ 104 more rows
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
# A tibble: 1 × 1
mx_loc
<dbl>
1 -14.2
# A tibble: 106 × 4
ttf_hour mean_fitted change_from_prev trend
<dbl> <dbl> <dbl> <chr>
1 -105 0.412 NA same
2 -104 0.412 -0.000408 lower
3 -103 0.411 -0.000428 lower
4 -102 0.411 -0.000404 lower
5 -101 0.411 -0.000380 lower
6 -100 0.410 -0.000354 lower
7 -99 0.410 -0.000328 lower
8 -98 0.410 -0.000301 lower
9 -97 0.409 -0.000274 lower
10 -96 0.409 -0.000245 lower
# ℹ 96 more rows
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
# A tibble: 1 × 1
mx_loc
<dbl>
1 -11.4
# A tibble: 114 × 4
ttf_hour mean_fitted change_from_prev trend
<dbl> <dbl> <dbl> <chr>
1 -113 0.411 NA same
2 -112 0.411 -0.000670 lower
3 -111 0.410 -0.000876 lower
4 -110 0.409 -0.000836 lower
5 -109 0.408 -0.000795 lower
6 -108 0.408 -0.000754 lower
7 -107 0.407 -0.000713 lower
8 -106 0.406 -0.000672 lower
9 -105 0.406 -0.000630 lower
10 -104 0.405 -0.000589 lower
# ℹ 104 more rows
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
# A tibble: 1 × 1
mx_loc
<dbl>
1 -12.1
# A tibble: 106 × 4
ttf_hour mean_fitted change_from_prev trend
<dbl> <dbl> <dbl> <chr>
1 -105 0.408 NA same
2 -104 0.408 -0.000197 lower
3 -103 0.407 -0.000253 lower
4 -102 0.407 -0.000230 lower
5 -101 0.407 -0.000206 lower
6 -100 0.407 -0.000181 lower
7 -99 0.407 -0.000155 lower
8 -98 0.407 -0.000128 lower
9 -97 0.406 -0.000100 lower
10 -96 0.406 -0.0000711 lower
# ℹ 96 more rows
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
# A tibble: 1 × 1
mx_loc
<dbl>
1 -26.0
# A tibble: 118 × 4
ttf_hour mean_fitted change_from_prev trend
<dbl> <dbl> <dbl> <chr>
1 -117 0.400 NA same
2 -116 0.399 -0.0000748 lower
3 -115 0.399 -0.000100 lower
4 -114 0.399 -0.0000817 lower
5 -113 0.399 -0.0000628 lower
6 -112 0.399 -0.0000435 lower
7 -111 0.399 -0.0000239 lower
8 -110 0.399 -0.00000375 lower
9 -109 0.399 0.0000168 higher
10 -108 0.399 0.0000377 higher
# ℹ 108 more rows
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
# A tibble: 1 × 1
mx_loc
<dbl>
1 -2.98
# A tibble: 116 × 4
ttf_hour mean_fitted change_from_prev trend
<dbl> <dbl> <dbl> <chr>
1 -115 0.397 NA same
2 -114 0.397 -0.000497 lower
3 -113 0.396 -0.000584 lower
4 -112 0.396 -0.000559 lower
5 -111 0.395 -0.000533 lower
6 -110 0.395 -0.000509 lower
7 -109 0.394 -0.000485 lower
8 -108 0.394 -0.000461 lower
9 -107 0.393 -0.000437 lower
10 -106 0.393 -0.000414 lower
# ℹ 106 more rows
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
# A tibble: 1 × 1
mx_loc
<dbl>
1 -23.5
# A tibble: 115 × 4
ttf_hour mean_fitted change_from_prev trend
<dbl> <dbl> <dbl> <chr>
1 -114 0.409 NA same
2 -113 0.407 -0.00177 lower
3 -112 0.405 -0.00189 lower
4 -111 0.404 -0.00183 lower
5 -110 0.402 -0.00177 lower
6 -109 0.400 -0.00172 lower
7 -108 0.398 -0.00166 lower
8 -107 0.397 -0.00160 lower
9 -106 0.395 -0.00153 lower
10 -105 0.394 -0.00147 lower
# ℹ 105 more rows
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
# A tibble: 1 × 1
mx_loc
<dbl>
1 -15.9
# A tibble: 108 × 4
ttf_hour mean_fitted change_from_prev trend
<dbl> <dbl> <dbl> <chr>
1 -107 0.390 NA same
2 -106 0.390 -0.000475 lower
3 -105 0.389 -0.000614 lower
4 -104 0.388 -0.000583 lower
5 -103 0.388 -0.000552 lower
6 -102 0.387 -0.000520 lower
7 -101 0.387 -0.000487 lower
8 -100 0.386 -0.000454 lower
9 -99 0.386 -0.000420 lower
10 -98 0.386 -0.000385 lower
# ℹ 98 more rows
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
# A tibble: 1 × 1
mx_loc
<dbl>
1 -6.73
# A tibble: 95 × 4
ttf_hour mean_fitted change_from_prev trend
<dbl> <dbl> <dbl> <chr>
1 -94 0.373 NA same
2 -93 0.373 0.000375 higher
3 -92 0.374 0.000466 higher
4 -91 0.374 0.000478 higher
5 -90 0.375 0.000492 higher
6 -89 0.375 0.000508 higher
7 -88 0.376 0.000526 higher
8 -87 0.376 0.000547 higher
9 -86 0.377 0.000569 higher
10 -85 0.377 0.000593 higher
# ℹ 85 more rows