Step_One__creating_dfs

Author

Isaac Berg

options(warn = -1)

set up

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(lmerTest)
Loading required package: lme4
Loading required package: Matrix

Attaching package: 'lmerTest'
The following object is masked from 'package:lme4':

    lmer
The following object is masked from 'package:stats':

    step
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   3.5.1     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(modelr)
library(purrr)
library(emmeans)
Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'
library(gridExtra)

Attaching package: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine
library(writexl)
library(gt)
library(webshot2)
library(broom.mixed)
library(lubridate)

Read in data

sowsraw <- read.csv("C:/Users/ipberg/OneDrive - Iowa State University/Documents/Code/Feature Analysis/18sows.csv",header = TRUE)

change column structure

sowsraw$hour_of_day <- as.factor(sowsraw$hour_of_day)
sowsraw$sow <- as.factor(sowsraw$sow)
sowsraw$status <- as.factor(sowsraw$status)
sowsraw$X10min_group <- as.factor(sowsraw$X10min_group)
sowsraw$X20min_group <- as.factor(sowsraw$X20min_group)
sowsraw$X30min_group <- as.factor(sowsraw$X30min_group)
sowsraw$X60min_group <- as.factor(sowsraw$X60min_group)
sowsraw$date <- as.factor(sowsraw$date)
sowsraw$day_of_month <- as.factor(sowsraw$day_of_month)
sowsraw$month <- as.factor(sowsraw$month)
sowsraw$frame_datetime <- as.POSIXct(sowsraw$frame_datetime,
                                format = "%Y-%m-%d %H:%M:%OS",
                                tz = "UTC")

Create df for before farrowing

sowsrawbf <- sowsraw %>% filter(status == "nfbf")

creates long format df before farrowing df

library(tidyr)
library(dplyr)

df_longbf <- sowsrawbf %>%
  pivot_longer(
    cols = c(Area, Perimeter, Convex.Area, Convex.Perimeter, 
             Major.Axis.Length, Minor.Axis.Length, Width, Height,Centroid.X,Centroid.Y,Rightmost.X,Rightmost.Y,
             Roundness, Elongation, Elasticity, Eccentricity, Concavity),
    names_to = "feature",
    values_to = "value"
  ) %>%
  select(Index, sow, datetime = frame_datetime,Time..hours., feature, value) %>%
  arrange(sow, feature, datetime)

gets hour of day and window groups

create_sequential_time_groups <- function(df, datetime_col = "datetime", 
                                          interval_minutes = 10, 
                                          group_col_name = "time_group",
                                          grouping_vars = c("sow", "feature")) {
  library(dplyr)
  library(lubridate)
  
  df <- df %>%
    group_by(across(all_of(grouping_vars))) %>%
    mutate(
      temp_time_group = floor_date(.data[[datetime_col]], 
                                    unit = paste0(interval_minutes, " minutes")),
      !!group_col_name := as.integer(factor(temp_time_group))
    ) %>%
    ungroup() %>%
    select(-temp_time_group)
  
  return(df)
}

get_hour_of_day <- function(datetime) {
  as.integer(format(datetime, "%H"))
}

adds new columns and change the column structure

df_longbf$datetime <- as.POSIXct(df_longbf$datetime,format = "%Y-%m-%d %H:%M:%OS",tz = "UTC")

df_longbf <- create_sequential_time_groups(df_longbf,interval_minutes = 10,group_col_name = "time_group_10")
df_longbf <-create_sequential_time_groups(df_longbf,interval_minutes = 20,group_col_name = "time_group_20")
df_longbf <-create_sequential_time_groups(df_longbf,interval_minutes = 30,group_col_name = "time_group_30")
df_longbf <-create_sequential_time_groups(df_longbf,interval_minutes = 60,group_col_name = "time_group_60")

df_longbf$time_group_10 <- as.factor(df_longbf$time_group_10)
df_longbf$time_group_20 <- as.factor(df_longbf$time_group_20)
df_longbf$time_group_30 <- as.factor(df_longbf$time_group_30)
df_longbf$time_group_60 <- as.factor(df_longbf$time_group_60)
df_longbf$feature <- as.factor(df_longbf$feature)

df_longbf$hour_of_day <- get_hour_of_day(df_longbf$datetime)
df_longbf$hour_of_day <- as.factor(df_longbf$hour_of_day)

creates a data set for each window group organized by hour of day (uses the mode hour of day for the time groups)

# Function to calculate mode
get_mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

# Dataset 1: 10-minute time groups
df_10min <- df_longbf %>%
  group_by(sow, feature, time_group_10) %>%
  summarize(
    datetime = first(datetime),
    hour = get_mode(hour_of_day),
    ttf = mean(Time..hours.),
    mean_value = mean(value, na.rm = TRUE),
    sd_value = sd(value, na.rm = TRUE),
    var_value = var(value, na.rm = TRUE),
    n_obs = n(),
    .groups = "drop"
  )

# Dataset 2: 20-minute time groups
df_20min <- df_longbf %>%
  group_by(sow, feature, time_group_20) %>%
  summarize(
    datetime = first(datetime),
    hour = get_mode(hour_of_day),
    ttf = mean(Time..hours.),
    mean_value = mean(value, na.rm = TRUE),
    sd_value = sd(value, na.rm = TRUE),
    var_value = var(value, na.rm = TRUE),
    n_obs = n(),
    .groups = "drop"
  )

# Dataset 3: 30-minute time groups
df_30min <- df_longbf %>%
  group_by(sow, feature, time_group_30) %>%
  summarize(
    datetime = first(datetime),
    hour = get_mode(hour_of_day),
    ttf = mean(Time..hours.),
    mean_value = mean(value, na.rm = TRUE),
    sd_value = sd(value, na.rm = TRUE),
    var_value = var(value, na.rm = TRUE),
    n_obs = n(),
    .groups = "drop"
  )

# Dataset 4: 60-minute time groups
df_60min <- df_longbf %>%
  group_by(sow, feature, time_group_60) %>%
  summarize(
    datetime = first(datetime),
    hour = get_mode(hour_of_day),
    ttf = mean(Time..hours.),
    mean_value = mean(value, na.rm = TRUE),
    sd_value = sd(value, na.rm = TRUE),
    var_value = var(value, na.rm = TRUE),
    n_obs = n(),
    .groups = "drop"
  )
save(df_60min,df_30min,df_20min,df_10min,file="Z:/Isaac/Visual Features/1-5/step1.RData")

go to step2