Untitled

options(warn = -1)

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(lmerTest)

Loading required package: lme4

Loading required package: Matrix


Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   3.5.1     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(modelr)
library(purrr)
library(emmeans)

Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'

library(gridExtra)


Attaching package: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine

library(writexl)
library(gt)
library(webshot2)
library(broom.mixed)
library(ggplot2)
library(isotree)
library(tidyr)
library(ggforce)


load("Z:/Isaac/Visual Features/1-5/step1.RData")

# Need to create a new wide df


features_to_use_mean <- c("Rightmost.X", "Width")

df_subset_mean <- df_10min_raw %>%
  select(sow, ttf, feature, mean_value)

df_wide_mean <- df_subset_mean %>%
  filter(feature %in% features_to_use_mean) %>%
  pivot_wider(
    names_from  = feature,
    values_from = mean_value,
    names_glue  = "{feature}_mean"
  ) %>% 
  select(sow,ttf,Width_mean,Rightmost.X_mean)

features_to_use_var <- c("Centroid.X", "Height","Major.Axis.Length","Rightmost.X","Width")

df_subset_var <- df_10min_raw %>%
  select(sow, ttf, feature, var_value)

df_wide_var <- df_subset_var %>%
  filter(feature %in% features_to_use_var) %>%
  pivot_wider(
    names_from  = feature,
    values_from = var_value,
    names_glue  = "{feature}_var"
  ) %>% 
  select(sow,ttf,Centroid.X_var, Height_var,Major.Axis.Length_var,Rightmost.X_var,Width_var)

df_wide_10 <- df_wide_mean %>% 
  left_join(df_wide_var,by=c("sow","ttf"))

colnames((df_wide_10))

[1] "sow"                   "ttf"                   "Width_mean"           
[4] "Rightmost.X_mean"      "Centroid.X_var"        "Height_var"           
[7] "Major.Axis.Length_var" "Rightmost.X_var"       "Width_var"

sow_12 <- df_wide_10 %>% 
    filter(sow == 12)

# Baseline
baseline12 <- sow_12 %>% 
  filter(between(ttf, -120, -50)) %>% 
  ungroup()

library(dplyr)
library(tidyr)
library(purrr)
# df with all sows with greater than 50 hours recording before farrowing
sows_to_keep <- df_wide_10 %>%
  group_by(sow) %>%
  filter(any(ttf < -50)) %>%
  ungroup()

# Nests data by sow
nesteddf <- sows_to_keep %>%
  nest(full = -sow)

# creates a new nested column for the baseline for each sow for -120 to -50
nesteddf <- nesteddf %>%
  mutate(
    baseline = map(
      full,
      ~ .x %>% filter(between(ttf, -120, -50))
    )
  )

# creates another column just for the model for the iso tree and applies the model to the data for each sow to have a trained model for each sow individually
nesteddf <- nesteddf %>%
  mutate(
    iso_model = map(
      baseline,
      ~ isolation.forest(data = .x %>% select(-ttf), ntrees = 500)
    )
  )
hour_cutoffs <- seq(-119, 120, by = 1)


nesteddf <- nesteddf %>%
  mutate(
    hourly_results = map2(
      full,
      iso_model,
      ~ {
        full_data <- .x # calls the data
        model     <- .y # calls the model

        map_dfr(hour_cutoffs, function(cutoff) {

          projected <- full_data %>%
            filter(ttf >= -120, ttf <= 120)

          if (nrow(projected) == 0) return(tibble())

          # Predict anomaly scores
          scores <- predict(model, newdata = projected %>% select(-ttf))
          projected <- projected %>% mutate(anomaly_score = scores)

          # Only fit loess if there are enough points
          if (nrow(projected) >= 5) {   # loess needs at least ~5 points for span=0.75
            loess_mod <- loess(anomaly_score ~ ttf, data = projected, span = 0.75)
            loess_fit <- predict(loess_mod, newdata = projected)
          } else {
            loess_fit <- NA_real_  # fallback
          }

          projected %>%
            mutate(
              loess_fit = loess_fit,
              cutoff_hour = cutoff
            )
        })
      }
    )
  )


# Loop through each sow
for (i in seq_len(nrow(nesteddf))) {
  sow_id <- nesteddf$sow[i]
  df_plot <- nesteddf$hourly_results[[i]]
  
  # Skip if the hourly_results is empty
  if (nrow(df_plot) == 0) next
  
  p <- ggplot(df_plot, aes(x = ttf,y=loess_fit)) +
    geom_smooth()+
    # geom_point(aes(y = anomaly_score), color = "blue", alpha = 0.5) +
    # geom_line(aes(y = loess_fit), color = "red", size = 1) +
    labs(
      title = paste("Sow", sow_id),
      x = "TTF",
      y = "Anomaly Score / Loess Fit"
    ) +
    theme_minimal(base_size = 14)
  
  print(p)
}

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

nested_data <- nesteddf %>% 
  select(sow, hourly_results) %>%
  unnest(hourly_results)  

# cuves <- nested_data %>%
#   mutate(loess_fit = map(data, loess,formula=anomaly_score~ttf,span=0.75),
#                          fitted = purrr::map(loess_fit, `[[`, "fitted")
#                          )

# results <- cuves %>%
#   dplyr::select(-loess_fit) %>% 
#   tidyr::unnest(cols = c(data, fitted))

find_max<-function(x,y){
  min_pos<-x[which.max(y)]
  return(min_pos)
}



mns_trained_on_itself_only <- nested_data %>%
  group_by(sow) %>%
  summarise(mx_loc = find_max(x = ttf, y = loess_fit), .groups = 'drop')

mns_trained_on_itself_only

# A tibble: 9 × 2
  sow   mx_loc
  <fct>  <dbl>
1 4     -22.7 
2 6     -14.2 
3 8     -11.4 
4 10    -12.1 
5 12    -26.0 
6 18     -2.98
7 22    -23.5 
8 24    -15.9 
9 26     -6.73

save(mns_trained_on_itself_only,file="Z:/Isaac/Visual Features/1-5/peak table/trained_individually.RData")