October PIC_data

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

library(tidyverse)

-- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
v dplyr     1.1.2     v readr     2.1.4
v forcats   1.0.0     v stringr   1.5.0
v ggplot2   3.4.3     v tibble    3.2.1
v lubridate 1.9.2     v tidyr     1.3.0
v purrr     1.0.1     
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(readr)
library(DT)
library(ggplot2)
library(quantreg)

Loading required package: SparseM

Attaching package: 'SparseM'

The following object is masked from 'package:base':

    backsolve

#| warning: true
#| echo: true
#| label: data_prepataion 
rm(list = ls())
setwd("C:/Users/anune/OneDrive/Desktop/PIC_DataAnalysis_files")
data_PIC <- read.csv("PIC_65_FIRE.AN.1.csv")
data_PIC<- mutate(data_PIC,day_0 = dmy(START_DAY), 
       ENTRY_DATE = date(as.POSIXct(ENTRY_TIME, format= "%m/%d/%Y %H:%M")),
       DAYS_IN_FEED = as.numeric(ENTRY_DATE - day_0))
summary(data_PIC$DAYS_IN_FEED)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
 -364.0    14.0    31.0    31.2    48.0    68.0      25

data_PIC <- group_by(data_PIC, ID)

head(data_PIC)

# A tibble: 6 x 19
# Groups:   ID [3]
        ID  LINE     SIRE    DAM LITTER PEN    FARM ENTRY_TIME EXIT_TIME STAY_IN
     <int> <int>    <int>  <int>  <int> <chr> <int> <chr>      <chr>       <int>
1 96251326    65 91032775 9.22e7 7.80e7 B0113   774 10/27/202~ 10/27/20~     649
2 96251327    65 91032775 9.22e7 7.80e7 B0113   774 10/27/202~ 10/27/20~    1948
3 96251327    65 91032775 9.22e7 7.80e7 B0113   774 10/27/202~ 10/27/20~     169
4 96284921    65 90218081 9.02e7 7.81e7 B0113   774 10/27/202~ 10/27/20~     627
5 96284921    65 90218081 9.02e7 7.81e7 B0113   774 10/27/202~ 10/27/20~     768
6 96284921    65 90218081 9.02e7 7.81e7 B0113   774 10/27/202~ 10/27/20~     413
# i 9 more variables: FEED_INTK <int>, ENTRY_WT <int>, EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, OFFTEST_DAY <chr>, day_0 <date>,
#   ENTRY_DATE <date>, DAYS_IN_FEED <dbl>

#| warning: true
#| echo: true
#| label: function_definition

perform_quantile_regression <- function(data_PIC, quantiles = quantiles)  {
  quantile_models <- rq(FEED_INTK ~ STAY_IN, data = data_PIC, tau = quantiles)
  storage_quantiles <- predict(quantile_models)
  colnames(storage_quantiles) <- c("Q1", "Q2", "Q3")
  outlier_band <- as.data.frame(storage_quantiles) %>%
    mutate(IQR = Q3 - Q1, LB = Q1 - 1.5 * IQR, UP = Q3 + 1.5 * IQR)
  id_band <- cbind(data_PIC, outlier_band) %>% mutate(ol = (FEED_INTK > UP) | (FEED_INTK < LB))
  return(id_band)
}
id_data1_outlier <- function(data1, quantiles = c(0.25, 0.75)) {
  id_data1_results <- data1 %>%
    mutate(Q1U = quantile(STAY_IN, probs = quantiles[1]),
           Q3U = quantile(STAY_IN, probs = quantiles[2]),
           IQRU = Q3U - Q1U,
           UBU = Q3U + 1.5 * IQRU,
           LBU = Q1U - 1.5 * IQRU,
           ol2 = (STAY_IN > UBU) | (STAY_IN < LBU))
  return(id_data1_results)
}

quantiles <- c(0.25, 0.5, 0.75)

storage_bygroup <- perform_quantile_regression(data_PIC, quantiles = quantiles)
head(storage_bygroup)

# A tibble: 6 x 26
# Groups:   ID [3]
        ID  LINE     SIRE    DAM LITTER PEN    FARM ENTRY_TIME EXIT_TIME STAY_IN
     <int> <int>    <int>  <int>  <int> <chr> <int> <chr>      <chr>       <int>
1 96251326    65 91032775 9.22e7 7.80e7 B0113   774 10/27/202~ 10/27/20~     649
2 96251327    65 91032775 9.22e7 7.80e7 B0113   774 10/27/202~ 10/27/20~    1948
3 96251327    65 91032775 9.22e7 7.80e7 B0113   774 10/27/202~ 10/27/20~     169
4 96284921    65 90218081 9.02e7 7.81e7 B0113   774 10/27/202~ 10/27/20~     627
5 96284921    65 90218081 9.02e7 7.81e7 B0113   774 10/27/202~ 10/27/20~     768
6 96284921    65 90218081 9.02e7 7.81e7 B0113   774 10/27/202~ 10/27/20~     413
# i 16 more variables: FEED_INTK <int>, ENTRY_WT <int>, EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, OFFTEST_DAY <chr>, day_0 <date>,
#   ENTRY_DATE <date>, DAYS_IN_FEED <dbl>, Q1 <dbl>, Q2 <dbl>, Q3 <dbl>,
#   IQR <dbl>, LB <dbl>, UP <dbl>, ol <lgl>

storage_id_data1 <- id_data1_outlier(storage_bygroup, quantiles = c(0.25, 0.75))
head(storage_id_data1)

# A tibble: 6 x 32
# Groups:   ID [3]
        ID  LINE     SIRE    DAM LITTER PEN    FARM ENTRY_TIME EXIT_TIME STAY_IN
     <int> <int>    <int>  <int>  <int> <chr> <int> <chr>      <chr>       <int>
1 96251326    65 91032775 9.22e7 7.80e7 B0113   774 10/27/202~ 10/27/20~     649
2 96251327    65 91032775 9.22e7 7.80e7 B0113   774 10/27/202~ 10/27/20~    1948
3 96251327    65 91032775 9.22e7 7.80e7 B0113   774 10/27/202~ 10/27/20~     169
4 96284921    65 90218081 9.02e7 7.81e7 B0113   774 10/27/202~ 10/27/20~     627
5 96284921    65 90218081 9.02e7 7.81e7 B0113   774 10/27/202~ 10/27/20~     768
6 96284921    65 90218081 9.02e7 7.81e7 B0113   774 10/27/202~ 10/27/20~     413
# i 22 more variables: FEED_INTK <int>, ENTRY_WT <int>, EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, OFFTEST_DAY <chr>, day_0 <date>,
#   ENTRY_DATE <date>, DAYS_IN_FEED <dbl>, Q1 <dbl>, Q2 <dbl>, Q3 <dbl>,
#   IQR <dbl>, LB <dbl>, UP <dbl>, ol <lgl>, Q1U <dbl>, Q3U <dbl>, IQRU <dbl>,
#   UBU <dbl>, LBU <dbl>, ol2 <lgl>

##ODP_full_data#

storage_id_data1= mutate(storage_id_data1, olf=ol|ol2, olc = ol*2 + ol2 )

head(storage_id_data1)

# A tibble: 6 x 34
# Groups:   ID [3]
        ID  LINE     SIRE    DAM LITTER PEN    FARM ENTRY_TIME EXIT_TIME STAY_IN
     <int> <int>    <int>  <int>  <int> <chr> <int> <chr>      <chr>       <int>
1 96251326    65 91032775 9.22e7 7.80e7 B0113   774 10/27/202~ 10/27/20~     649
2 96251327    65 91032775 9.22e7 7.80e7 B0113   774 10/27/202~ 10/27/20~    1948
3 96251327    65 91032775 9.22e7 7.80e7 B0113   774 10/27/202~ 10/27/20~     169
4 96284921    65 90218081 9.02e7 7.81e7 B0113   774 10/27/202~ 10/27/20~     627
5 96284921    65 90218081 9.02e7 7.81e7 B0113   774 10/27/202~ 10/27/20~     768
6 96284921    65 90218081 9.02e7 7.81e7 B0113   774 10/27/202~ 10/27/20~     413
# i 24 more variables: FEED_INTK <int>, ENTRY_WT <int>, EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, OFFTEST_DAY <chr>, day_0 <date>,
#   ENTRY_DATE <date>, DAYS_IN_FEED <dbl>, Q1 <dbl>, Q2 <dbl>, Q3 <dbl>,
#   IQR <dbl>, LB <dbl>, UP <dbl>, ol <lgl>, Q1U <dbl>, Q3U <dbl>, IQRU <dbl>,
#   UBU <dbl>, LBU <dbl>, ol2 <lgl>, olf <lgl>, olc <dbl>

datatable(storage_id_data1)

Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html

full_outliers <- storage_id_data1
table(storage_id_data1$olc)


     0      1      2      3 
111713    425   2108     17

outlier_frequencies <- table(storage_id_data1$olc)

outlier_percentages <- (outlier_frequencies / nrow(storage_id_data1)) * 100

outlier_summary <- data.frame(Frequencies = outlier_frequencies, Percentages = outlier_percentages)
print(outlier_summary)

  Frequencies.Var1 Frequencies.Freq Percentages.Var1 Percentages.Freq
1                0           111713                0      97.76830645
2                1              425                1       0.37194892
3                2             2108                2       1.84486667
4                3               17                3       0.01487796

425+2108+17

[1] 2550

2550/114263*100

[1] 2.231694

total_frequencies <- c(
  sum(storage_id_data1$ol > 0),
  sum(storage_id_data1$ol2 > 0),
  sum(storage_id_data1$olf > 0)
)
total_percentages <- (total_frequencies / nrow(storage_id_data1)) * 100


id_frequencies <- table(storage_id_data1$ID, storage_id_data1$olc)
id_percentages <- (id_frequencies / rowSums(id_frequencies)) * 100


total_outlier_summary <- data.frame(
  Outlier_Type = c("ol", "ol2", "olf"),
  Frequencies = total_frequencies,
  Percentages = total_percentages
)

total_outliers_count <- sum(storage_id_data1$ol > 0 | storage_id_data1$ol2 > 0 | storage_id_data1$olf > 0)
total_raw_data_count <- nrow(storage_id_data1)
total_outliers_percentage <- (total_outliers_count / total_raw_data_count) * 100

print(paste("Total Outliers Count:", total_outliers_count))

[1] "Total Outliers Count: 2550"

print(paste("Total Raw Data Count:", total_raw_data_count))

[1] "Total Raw Data Count: 114263"

print(paste("Total Outliers Percentage:", total_outliers_percentage))

[1] "Total Outliers Percentage: 2.23169354909288"

total_ids <- length(unique(storage_id_data1$ID))

# Calculate the percentages of each type of outlier per ID
percentages_per_id <- total_frequencies/total_ids

# Create a summary data frame
outlier_summary <- data.frame(
  Outlier_Type = c("ol", "ol2", "olf"),
  Percentage_Per_ID = percentages_per_id
)

# Print the summary
print(outlier_summary)

  Outlier_Type Percentage_Per_ID
1           ol         3.8777372
2          ol2         0.8065693
3          olf         4.6532847

You can add options to executable code like this

outlier_colors <- c("black", "green", "orange","pink")

my_plot <- storage_id_data1 %>%
  filter(ID %in% c("97257312", "96954301", "96587437", "97178318")) %>%  # Filter data for specific IDs
  ggplot(aes(x = STAY_IN, y = FEED_INTK, color = factor(olc))) +
  geom_point() + 
  geom_point(size = 0.5, aes(x = STAY_IN, y = UP), color = "red") +
  geom_point(size = 0.5, aes(x = STAY_IN, y = LB), color = "red") +
  geom_quantile(quantiles = 0.25, color = "blue") +
  geom_quantile(quantiles = 0.50, color = "purple") +
  geom_quantile(quantiles = 0.75, color = "cyan4") +
  scale_color_manual(values = outlier_colors) + 
  facet_wrap(~ID) +
  theme_light() +
  labs(
    title = "Quantile regression on data with non-constant variance",
    subtitle = "Ames, Iowa, October 2023",
    x = "Stay In time at the feeders",
    y = "Feed Intake",
    caption = "Example of phenotypic data performing a quantile regression model"
  ) +
  theme(
    axis.title = element_text(size = 10),
    axis.text = element_text(size = 8, color = "darkgray"),
    axis.ticks = element_line(size = 3),
    plot.title = element_text(size = 20, color = "darkblue")
  )

Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
i Please use the `linewidth` argument instead.

my_plot

Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x

#Just ONE ID#

outlier_colors <- c("black", "green", "orange","pink")

my_plot2 <- storage_id_data1 %>%
  filter(ID == "97257312") %>%  # Filter data for the specific ID
  ggplot(aes(x = STAY_IN, y = FEED_INTK, color = factor(olc))) +
  geom_point() + 
  geom_point(size = 0.5, aes(x = STAY_IN, y = UP), color = "red") +
  geom_point(size = 0.5, aes(x = STAY_IN, y = LB), color = "red") +
  geom_quantile(quantiles = 0.25, color = "blue") +
  geom_quantile(quantiles = 0.50, color = "purple") +
  geom_quantile(quantiles = 0.75, color = "cyan4") +
  scale_color_manual(values = outlier_colors) + 
  facet_wrap(~ID) +
  theme_light() +
  labs(
    title = "Quantile regression on data with non-constant variance",
    subtitle = "Ames, Iowa, October 2023",
    x = "Stay In time at the feeders",
    y = "Feed Intake",
    caption = "Example of phenotypic data performing a quantile regression model"
  ) +
  theme(
    axis.title = element_text(size = 10),
    axis.text = element_text(size = 8, color = "darkgray"),
    axis.ticks = element_line(size = 3),
    plot.title = element_text(size = 20, color = "darkblue")
  )

my_plot2

Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x

outlier_colors <- c("black", "green", "orange", "pink")

my_plot3 <- storage_id_data1 %>%
  filter(ID == "96954301") %>%  # Filter data for the specific ID
  ggplot(aes(x = STAY_IN, y = FEED_INTK, color = factor(olc))) +
  geom_point() + 
  geom_point(size = 0.5, aes(x = STAY_IN, y = UP), color = "red") +
  geom_point(size = 0.5, aes(x = STAY_IN, y = LB), color = "red") +
  geom_quantile(quantiles = 0.25, color = "blue") +
  geom_quantile(quantiles = 0.50, color = "purple") +
  geom_quantile(quantiles = 0.75, color = "cyan4") +
  scale_color_manual(values = outlier_colors) + 
  facet_wrap(~ID) +
  theme_light() +
  labs(
    title = "Quantile regression on data with non-constant variance",
    subtitle = "Ames, Iowa, October 2023",
    x = "Stay In time at the feeders",
    y = "Feed Intake",
    caption = "Example of phenotypic data performing a quantile regression model"
  ) +
  theme(
    axis.title = element_text(size = 10),
    axis.text = element_text(size = 8, color = "darkgray"),
    axis.ticks = element_line(size = 3),
    plot.title = element_text(size = 20, color = "darkblue")
  )

my_plot3

Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x

ggsave("ggplot_FOUR_ID.png", plot = my_plot, width = 8, height = 6, dpi = 300)

Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x

ggsave("ggplot_ONE_ID.png", plot = my_plot3, width = 8, height = 6, dpi = 300)

Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x

#Progressive outlier detection

outliers_progressive = list()
j = 1
data_PIC_partial <- filter(data_PIC
                           , DAYS_IN_FEED <= 14)

storage_bygroup <- perform_quantile_regression(data_PIC_partial,
                                               quantiles = quantiles)

storage_id_data1 <- id_data1_outlier(storage_bygroup, quantiles = c(0.25, 0.75))


outliers_progressive[[j]]= storage_id_data1

for (i  in 15: 68) {
j = j+1

data_PIC_partial <- filter(data_PIC
                           , DAYS_IN_FEED <= i)

storage_bygroup <- perform_quantile_regression(data_PIC_partial,
                                               quantiles = quantiles)

storage_id_data1 <- id_data1_outlier(storage_bygroup, quantiles = c(0.25, 0.75))
storage_id_data1 <- filter(storage_id_data1, DAYS_IN_FEED == i)

outliers_progressive[[j]]= storage_id_data1

} 
storage_id_data1 <- bind_rows(outliers_progressive)
dim (storage_id_data1)

[1] 114238     32

length (storage_id_data1)

[1] 32

head(storage_id_data1)

# A tibble: 6 x 32
# Groups:   ID [3]
        ID  LINE     SIRE    DAM LITTER PEN    FARM ENTRY_TIME EXIT_TIME STAY_IN
     <int> <int>    <int>  <int>  <int> <chr> <int> <chr>      <chr>       <int>
1 96251326    65 91032775 9.22e7 7.80e7 B0113   774 10/27/202~ 10/27/20~     649
2 96251327    65 91032775 9.22e7 7.80e7 B0113   774 10/27/202~ 10/27/20~    1948
3 96251327    65 91032775 9.22e7 7.80e7 B0113   774 10/27/202~ 10/27/20~     169
4 96284921    65 90218081 9.02e7 7.81e7 B0113   774 10/27/202~ 10/27/20~     627
5 96284921    65 90218081 9.02e7 7.81e7 B0113   774 10/27/202~ 10/27/20~     768
6 96284921    65 90218081 9.02e7 7.81e7 B0113   774 10/27/202~ 10/27/20~     413
# i 22 more variables: FEED_INTK <int>, ENTRY_WT <int>, EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, OFFTEST_DAY <chr>, day_0 <date>,
#   ENTRY_DATE <date>, DAYS_IN_FEED <dbl>, Q1 <dbl>, Q2 <dbl>, Q3 <dbl>,
#   IQR <dbl>, LB <dbl>, UP <dbl>, ol <lgl>, Q1U <dbl>, Q3U <dbl>, IQRU <dbl>,
#   UBU <dbl>, LBU <dbl>, ol2 <lgl>

tail(storage_id_data1)

# A tibble: 6 x 32
# Groups:   ID [4]
        ID  LINE     SIRE    DAM LITTER PEN    FARM ENTRY_TIME EXIT_TIME STAY_IN
     <int> <int>    <int>  <int>  <int> <chr> <int> <chr>      <chr>       <int>
1 96587429    65 87591163 8.92e7 7.82e7 B0506   774 1/30/2023~ 1/30/202~    1152
2 96587429    65 87591163 8.92e7 7.82e7 B0506   774 1/30/2023~ 1/30/202~     360
3 96600204    65 92013924 8.94e7 7.82e7 B0506   774 1/30/2023~ 1/30/202~     465
4 96600204    65 92013924 8.94e7 7.82e7 B0506   774 1/30/2023~ 1/30/202~      26
5 96601076    65 92013924 9.11e7 7.82e7 B0506   774 1/30/2023~ 1/30/202~    1529
6 96601065    65 92013924 8.82e7 7.82e7 B0506   774 1/30/2023~ 1/30/202~      70
# i 22 more variables: FEED_INTK <int>, ENTRY_WT <int>, EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, OFFTEST_DAY <chr>, day_0 <date>,
#   ENTRY_DATE <date>, DAYS_IN_FEED <dbl>, Q1 <dbl>, Q2 <dbl>, Q3 <dbl>,
#   IQR <dbl>, LB <dbl>, UP <dbl>, ol <lgl>, Q1U <dbl>, Q3U <dbl>, IQRU <dbl>,
#   UBU <dbl>, LBU <dbl>, ol2 <lgl>

#| warning: true
#| echo: true
#| label:progressive outlier summary

storage_id_data1= mutate(storage_id_data1, olf=ol|ol2, olc = ol*2 + ol2 )

table(storage_id_data1$olc)


     0      1      2      3 
106685    894   6616     43

outlier_frequencies <- table(storage_id_data1$olc)

outlier_percentages <- (outlier_frequencies / nrow(storage_id_data1)) * 100

outlier_summary <- data.frame(Frequencies = outlier_frequencies, Percentages = outlier_percentages)
print(outlier_summary)

  Frequencies.Var1 Frequencies.Freq Percentages.Var1 Percentages.Freq
1                0           106685                0      93.38836464
2                1              894                1       0.78257673
3                2             6616                2       5.79141792
4                3               43                3       0.03764071

total_frequencies <- c(
  sum(storage_id_data1$ol > 0),
  sum(storage_id_data1$ol2 > 0),
  sum(storage_id_data1$olf > 0)
)
total_frequencies

[1] 6659  937 7553

total_percentages <- (total_frequencies / nrow(storage_id_data1)) * 100


id_frequencies <- table(storage_id_data1$ID, storage_id_data1$olc)
id_percentages <- (id_frequencies / rowSums(id_frequencies)) * 100


total_outlier_summary <- data.frame(
  Outlier_Type = c("ol", "ol2", "olf"),
  Frequencies = total_frequencies,
  Percentages = total_percentages
)

total_outliers_count <- sum(storage_id_data1$ol > 0 | storage_id_data1$ol2 > 0 | storage_id_data1$olf > 0)
total_raw_data_count <- nrow(storage_id_data1)
total_outliers_percentage <- (total_outliers_count / total_raw_data_count) * 100

print(paste("Total Outliers Count:", total_outliers_count))

[1] "Total Outliers Count: 7553"

print(paste("Total Raw Data Count:", total_raw_data_count))

[1] "Total Raw Data Count: 114238"

print(paste("Total Outliers Percentage:", total_outliers_percentage))

[1] "Total Outliers Percentage: 6.61163535776187"

total_ids <- length(unique(storage_id_data1$ID))

# Calculate the percentages of each type of outlier per ID
percentages_per_id <- total_frequencies/total_ids

# Create a summary data frame
outlier_summary <- data.frame(
  Outlier_Type = c("ol", "ol2", "olf"),
  Percentage_Per_ID = percentages_per_id
)

# Print the summary
print(outlier_summary)

  Outlier_Type Percentage_Per_ID
1           ol         12.151460
2          ol2          1.709854
3          olf         13.782847

outlier_colors <- c("black", "green", "orange","pink")

my_plot4 <- storage_id_data1 %>%
  filter(ID %in% c("97257312", "96954301", "96587437", "97178318")) %>%  # Filter data for specific IDs
  ggplot(aes(x = STAY_IN, y = FEED_INTK, color = factor(olc))) +
  geom_point() + 
  #geom_point(size = 0.5, aes(x = STAY_IN, y = UP), color = "red") +
  #geom_point(size = 0.5, aes(x = STAY_IN, y = LB), color = "red") +
  geom_quantile(quantiles = 0.25, color = "blue") +
  geom_quantile(quantiles = 0.50, color = "purple") +
  geom_quantile(quantiles = 0.75, color = "cyan4") +
  scale_color_manual(values = outlier_colors) + 
  facet_wrap(~ID) +
  theme_light() +
  labs(
    title = "Progressive outlier detection",
    subtitle = "Ames, Iowa, October 2023",
    x = "Stay In time at the feeders",
    y = "Feed Intake",
    caption = "Example of phenotypic data performing a outlier detection from 14 to 68 days in feed"
  ) +
  theme(
    axis.title = element_text(size = 10),
    axis.text = element_text(size = 8, color = "darkgray"),
    axis.ticks = element_line(size = 3),
    plot.title = element_text(size = 20, color = "darkblue")
  )

my_plot4

Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x

#Just ONE ID#

outlier_colors <- c("black", "green", "orange","pink")

my_plot5 <- storage_id_data1 %>%
  filter(ID == "97257312") %>%  # Filter data for the specific ID
  ggplot(aes(x = STAY_IN, y = FEED_INTK, color = factor(olc))) +
  geom_point() + 
  #geom_point(size = 0.5, aes(x = STAY_IN, y = UP), color = "red") +
  #geom_point(size = 0.5, aes(x = STAY_IN, y = LB), color = "red") +
  geom_quantile(quantiles = 0.25, color = "blue") +
  geom_quantile(quantiles = 0.50, color = "purple") +
  geom_quantile(quantiles = 0.75, color = "cyan4") +
  scale_color_manual(values = outlier_colors) + 
  facet_wrap(~ID) +
  theme_light() +
  labs(
    title = "Progressive outlier detection",
    subtitle = "Ames, Iowa, October 2023",
    x = "Stay In time at the feeders",
    y = "Feed Intake",
    caption = "Example of phenotypic data performing a outlier detection from 14 to 68 days in feed"
  ) +
  theme(
    axis.title = element_text(size = 10),
    axis.text = element_text(size = 8, color = "darkgray"),
    axis.ticks = element_line(size = 3),
    plot.title = element_text(size = 20, color = "darkblue")
  )

my_plot5

Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x

outlier_colors <- c("black", "green", "orange", "pink")

my_plot6 <- storage_id_data1 %>%
  filter(ID == "96954301") %>%  # Filter data for the specific ID
  ggplot(aes(x = STAY_IN, y = FEED_INTK, color = factor(olc))) +
  geom_point() + 
  #geom_point(size = 0.5, aes(x = STAY_IN, y = UP), color = "red") +
  #geom_point(size = 0.5, aes(x = STAY_IN, y = LB), color = "red") +
  geom_quantile(quantiles = 0.25, color = "blue") +
  geom_quantile(quantiles = 0.50, color = "purple") +
  geom_quantile(quantiles = 0.75, color = "cyan4") +
  scale_color_manual(values = outlier_colors) + 
  facet_wrap(~ID) +
  theme_light() +
  labs(
    title = "Progressive outlier detection",
    subtitle = "Ames, Iowa, October 2023",
    x = "Stay In time at the feeders",
    y = "Feed Intake",
    caption = "Example of phenotypic data performing a outlier detection from 14 to 68 days in feed"
  ) +
  theme(
    axis.title = element_text(size = 10),
    axis.text = element_text(size = 8, color = "darkgray"),
    axis.ticks = element_line(size = 3),
    plot.title = element_text(size = 20, color = "darkblue")
  )

my_plot6

Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x

The echo: false option disables the printing of code (only output is displayed).