options(warn = -1)Step_One__creating_dfs
set up
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(lmerTest)Loading required package: lme4
Loading required package: Matrix
Attaching package: 'lmerTest'
The following object is masked from 'package:lme4':
lmer
The following object is masked from 'package:stats':
step
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0 ✔ readr 2.1.5
✔ ggplot2 3.5.1 ✔ stringr 1.5.1
✔ lubridate 1.9.3 ✔ tibble 3.2.1
✔ purrr 1.0.2 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
✖ tidyr::pack() masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(modelr)
library(purrr)
library(emmeans)Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'
library(gridExtra)
Attaching package: 'gridExtra'
The following object is masked from 'package:dplyr':
combine
library(writexl)
library(gt)
library(webshot2)
library(broom.mixed)
library(lubridate)Read in data
sowsraw <- read.csv("C:/Users/ipberg/OneDrive - Iowa State University/Documents/Code/Feature Analysis/18sows.csv",header = TRUE)change column structure
sowsraw$hour_of_day <- as.factor(sowsraw$hour_of_day)
sowsraw$sow <- as.factor(sowsraw$sow)
sowsraw$status <- as.factor(sowsraw$status)
sowsraw$X10min_group <- as.factor(sowsraw$X10min_group)
sowsraw$X20min_group <- as.factor(sowsraw$X20min_group)
sowsraw$X30min_group <- as.factor(sowsraw$X30min_group)
sowsraw$X60min_group <- as.factor(sowsraw$X60min_group)
sowsraw$date <- as.factor(sowsraw$date)
sowsraw$day_of_month <- as.factor(sowsraw$day_of_month)
sowsraw$month <- as.factor(sowsraw$month)
sowsraw$frame_datetime <- as.POSIXct(sowsraw$frame_datetime,
format = "%Y-%m-%d %H:%M:%OS",
tz = "UTC")Create df for before farrowing
sowsrawbf <- sowsraw %>% filter(status == "nfbf")creates long format df before farrowing df
library(tidyr)
library(dplyr)
df_longbf <- sowsrawbf %>%
pivot_longer(
cols = c(Area, Perimeter, Convex.Area, Convex.Perimeter,
Major.Axis.Length, Minor.Axis.Length, Width, Height,Centroid.X,Centroid.Y,Rightmost.X,Rightmost.Y,
Roundness, Elongation, Elasticity, Eccentricity, Concavity),
names_to = "feature",
values_to = "value"
) %>%
select(Index, sow, datetime = frame_datetime,Time..hours., feature, value) %>%
arrange(sow, feature, datetime)gets hour of day and window groups
create_sequential_time_groups <- function(df, datetime_col = "datetime",
interval_minutes = 10,
group_col_name = "time_group",
grouping_vars = c("sow", "feature")) {
library(dplyr)
library(lubridate)
df <- df %>%
group_by(across(all_of(grouping_vars))) %>%
mutate(
temp_time_group = floor_date(.data[[datetime_col]],
unit = paste0(interval_minutes, " minutes")),
!!group_col_name := as.integer(factor(temp_time_group))
) %>%
ungroup() %>%
select(-temp_time_group)
return(df)
}
get_hour_of_day <- function(datetime) {
as.integer(format(datetime, "%H"))
}adds new columns and change the column structure
df_longbf$datetime <- as.POSIXct(df_longbf$datetime,format = "%Y-%m-%d %H:%M:%OS",tz = "UTC")
df_longbf <- create_sequential_time_groups(df_longbf,interval_minutes = 10,group_col_name = "time_group_10")
df_longbf <-create_sequential_time_groups(df_longbf,interval_minutes = 20,group_col_name = "time_group_20")
df_longbf <-create_sequential_time_groups(df_longbf,interval_minutes = 30,group_col_name = "time_group_30")
df_longbf <-create_sequential_time_groups(df_longbf,interval_minutes = 60,group_col_name = "time_group_60")
df_longbf$time_group_10 <- as.factor(df_longbf$time_group_10)
df_longbf$time_group_20 <- as.factor(df_longbf$time_group_20)
df_longbf$time_group_30 <- as.factor(df_longbf$time_group_30)
df_longbf$time_group_60 <- as.factor(df_longbf$time_group_60)
df_longbf$feature <- as.factor(df_longbf$feature)
df_longbf$hour_of_day <- get_hour_of_day(df_longbf$datetime)
df_longbf$hour_of_day <- as.factor(df_longbf$hour_of_day)creates a data set for each window group organized by hour of day (uses the mode hour of day for the time groups)
# Function to calculate mode
get_mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
# Dataset 1: 10-minute time groups
df_10min <- df_longbf %>%
group_by(sow, feature, time_group_10) %>%
summarize(
datetime = first(datetime),
hour = get_mode(hour_of_day),
ttf = mean(Time..hours.),
mean_value = mean(value, na.rm = TRUE),
sd_value = sd(value, na.rm = TRUE),
var_value = var(value, na.rm = TRUE),
n_obs = n(),
.groups = "drop"
)
# Dataset 2: 20-minute time groups
df_20min <- df_longbf %>%
group_by(sow, feature, time_group_20) %>%
summarize(
datetime = first(datetime),
hour = get_mode(hour_of_day),
ttf = mean(Time..hours.),
mean_value = mean(value, na.rm = TRUE),
sd_value = sd(value, na.rm = TRUE),
var_value = var(value, na.rm = TRUE),
n_obs = n(),
.groups = "drop"
)
# Dataset 3: 30-minute time groups
df_30min <- df_longbf %>%
group_by(sow, feature, time_group_30) %>%
summarize(
datetime = first(datetime),
hour = get_mode(hour_of_day),
ttf = mean(Time..hours.),
mean_value = mean(value, na.rm = TRUE),
sd_value = sd(value, na.rm = TRUE),
var_value = var(value, na.rm = TRUE),
n_obs = n(),
.groups = "drop"
)
# Dataset 4: 60-minute time groups
df_60min <- df_longbf %>%
group_by(sow, feature, time_group_60) %>%
summarize(
datetime = first(datetime),
hour = get_mode(hour_of_day),
ttf = mean(Time..hours.),
mean_value = mean(value, na.rm = TRUE),
sd_value = sd(value, na.rm = TRUE),
var_value = var(value, na.rm = TRUE),
n_obs = n(),
.groups = "drop"
)save(df_60min,df_30min,df_20min,df_10min,file="Z:/Isaac/Visual Features/1-5/step1.RData")