step2

options(warn = -1)

set up

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(lmerTest)

Loading required package: lme4

Loading required package: Matrix


Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   3.5.1     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(modelr)
library(purrr)
library(emmeans)

Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'

library(gridExtra)


Attaching package: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine

library(writexl)
library(gt)
library(webshot2)
library(broom.mixed)
library(lubridate)

load(file="Z:/Isaac/Visual Features/1-5/step1.RData")

vars_from_long <- function(df){

  df <- df %>% mutate(hour = as.factor(hour))

  # nest everything except 'feature'
  nested_data <- df %>% nest(data = -feature)

  models <- nested_data %>%
    mutate(
      model_fit = map(data, ~ lmer(mean_value ~ hour - 1 + (1|sow), data = .x))
    )

  # glance per model
  model_summaries <- models %>%
    mutate(smr = map(model_fit, broom.mixed::glance)) %>%
    unnest(smr)

  # tidy per model
  fit_summary <- models %>%
    mutate(smr = map(model_fit, broom.mixed::tidy)) %>%
    unnest(smr)

  # augment per model
  augment_df <- models %>%
    mutate(aug = map2(model_fit, data, ~ broom.mixed::augment(.x, newdata = .y))) %>%
    select(feature, aug)   # keeps feature for identification

  return(list(
    fixed           = fit_summary %>% filter(effect=="fixed"),
    random          = fit_summary %>% filter(effect=="ran_pars"),
    model_summaries = model_summaries,
    augment         = augment_df
  ))
}

vars_from_long_var <- function(df){

  df <- df %>% mutate(hour = as.factor(hour))

  # nest everything except 'feature'
  nested_data <- df %>% nest(data = -feature)

  models <- nested_data %>%
    mutate(
      model_fit = map(data, ~ lmer(var_value ~ hour - 1 + (1|sow), data = .x))
    )

  # glance per model
  model_summaries <- models %>%
    mutate(smr = map(model_fit, broom.mixed::glance)) %>%
    unnest(smr)

  # tidy per model
  fit_summary <- models %>%
    mutate(smr = map(model_fit, broom.mixed::tidy)) %>%
    unnest(smr)

  # augment per model
  augment_df <- models %>%
    mutate(aug = map2(model_fit, data, ~ broom.mixed::augment(.x, newdata = .y))) %>%
    select(feature, aug)   # keeps feature for identification

  return(list(
    fixed           = fit_summary %>% filter(effect=="fixed"),
    random          = fit_summary %>% filter(effect=="ran_pars"),
    model_summaries = model_summaries,
    augment         = augment_df
  ))
}

created new nested dfs with fixed, random and summaries

df_10_min_vars <- vars_from_long(df_10min)
df_20_min_vars <- vars_from_long(df_20min)
df_30_min_vars <- vars_from_long(df_30min)
df_60_min_vars <- vars_from_long(df_60min)

aug_res_10 <- df_10_min_vars$augment %>% unnest(aug)
aug_res_20 <- df_20_min_vars$augment %>% unnest(aug)
aug_res_30 <- df_30_min_vars$augment %>% unnest(aug)
aug_res_60 <- df_60_min_vars$augment %>% unnest(aug)

aug_res_10$.resid <- aug_res_10$mean_value-aug_res_10$.fitted
aug_res_20$.resid <- aug_res_20$mean_value-aug_res_20$.fitted
aug_res_30$.resid <- aug_res_30$mean_value-aug_res_30$.fitted
aug_res_60$.resid <- aug_res_60$mean_value-aug_res_60$.fitted

df_10_min_vars_var <- vars_from_long_var(df_10min)
df_20_min_vars_var <- vars_from_long_var(df_20min)
df_30_min_vars_var <- vars_from_long_var(df_30min)
df_60_min_vars_var <- vars_from_long_var(df_60min)

aug_res_10_var <- df_10_min_vars_var$augment %>% unnest(aug)
aug_res_20_var <- df_20_min_vars_var$augment %>% unnest(aug)
aug_res_30_var <- df_30_min_vars_var$augment %>% unnest(aug)
aug_res_60_var <- df_60_min_vars_var$augment %>% unnest(aug)

aug_res_10_var$.resid <- aug_res_10_var$var_value-aug_res_10_var$.fitted
aug_res_20_var$.resid <- aug_res_20_var$var_value-aug_res_20_var$.fitted
aug_res_30_var$.resid <- aug_res_30_var$var_value-aug_res_30_var$.fitted
aug_res_60_var$.resid <- aug_res_60_var$var_value-aug_res_60_var$.fitted

creates df with the feature, sow variance and residual variance to then calculate the repeatability for each of the dfs from directly above for each of the time windows

df_10min_random_vars <- df_10_min_vars$random %>% 
  select(feature,group,estimate) %>% 
  pivot_wider(
    names_from=group,
    values_from = estimate
  )
df_20min_random_vars <- df_20_min_vars$random %>% 
  select(feature,group,estimate) %>% 
  pivot_wider(
    names_from=group,
    values_from = estimate
  )
df_30min_random_vars <- df_30_min_vars$random %>% 
  select(feature,group,estimate) %>% 
  pivot_wider(
    names_from=group,
    values_from = estimate
  )
df_60min_random_vars <- df_60_min_vars$random %>% 
  select(feature,group,estimate) %>% 
  pivot_wider(
    names_from=group,
    values_from = estimate
  )

adds a column to the above new dfs for repeatability and shows the table for each of the windows

df_10min_random_vars$repeatability <- df_10min_random_vars$sow/(df_10min_random_vars$sow+df_10min_random_vars$Residual)
df_20min_random_vars$repeatability <- df_20min_random_vars$sow/(df_20min_random_vars$sow+df_20min_random_vars$Residual)
df_30min_random_vars$repeatability <- df_30min_random_vars$sow/(df_30min_random_vars$sow+df_30min_random_vars$Residual)
df_60min_random_vars$repeatability <- df_60min_random_vars$sow/(df_60min_random_vars$sow+df_60min_random_vars$Residual)

savings dfs for step 2

save(
  df_10min_random_vars,df_20min_random_vars,df_30min_random_vars,df_60min_random_vars,aug_res_10_var,aug_res_20_var,aug_res_30_var,aug_res_60_var,aug_res_10,aug_res_20,aug_res_30,aug_res_60,file="Z:/Isaac/Visual Features/1-5/step2.RData"
)

set up

created new nested dfs with fixed, random and summaries

creates df with the feature, sow variance and residual variance to then calculate the repeatability for each of the dfs from directly above for each of the time windows

adds a column to the above new dfs for repeatability and shows the table for each of the windows

savings dfs for step 2

go to step3