PIC.asreml4.2

Quarto

library(tidyr)
library(broom)
library(lme4)

Loading required package: Matrix


Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(lme4)
library(lmerTest)


Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step

library(emmeans)
library(car)

Loading required package: carData


Attaching package: 'car'

The following object is masked from 'package:dplyr':

    recode

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   3.5.0     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ Matrix::expand() masks tidyr::expand()
✖ dplyr::filter()  masks stats::filter()
✖ dplyr::lag()     masks stats::lag()
✖ Matrix::pack()   masks tidyr::pack()
✖ car::recode()    masks dplyr::recode()
✖ purrr::some()    masks car::some()
✖ Matrix::unpack() masks tidyr::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(corrplot)

corrplot 0.92 loaded

library(RColorBrewer)
library(ggplot2)
library(MASS)


Attaching package: 'MASS'

The following object is masked from 'package:dplyr':

    select

library(agricolae)
library(vegan)

Loading required package: permute
Loading required package: lattice
This is vegan 2.6-4

library(dplyr)
library(readr)
library(DT)
library(ggplot2)
library(quantreg)

Loading required package: SparseM

Attaching package: 'SparseM'

The following object is masked from 'package:base':

    backsolve

library(broom.mixed)
library(pedigree)
library(pedigreemm)
library(pedtools)

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Reading data

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

rm(list = ls())

setwd("C:\\Users\\anunez\\OneDrive - Iowa State University\\Desktop\\PIC_DataAnalysis_files")

data_PIC <- read.csv("FINAL_FIRE_2019_2023_AN.csv")

summary(data_PIC)

       ID                 LINE    PED_IDENT_SIRE     PED_IDENT_DAM     
 Min.   : 80406526   Min.   :65   Min.   :72321895   Min.   :70838758  
 1st Qu.: 84007778   1st Qu.:65   1st Qu.:79257130   1st Qu.:79153747  
 Median : 88690351   Median :65   Median :83589571   Median :84025461  
 Mean   : 89253749   Mean   :65   Mean   :83724294   Mean   :83988626  
 3rd Qu.: 93986447   3rd Qu.:65   3rd Qu.:87591160   3rd Qu.:88191389  
 Max.   :100603759   Max.   :65   Max.   :95485445   Max.   :96751344  
 LIT_LITTER_ID          PEN              TEST_FARM    ENTRY_TIME       
 Min.   :68967039   Length:1048575     Min.   :774   Length:1048575    
 1st Qu.:71920248   Class :character   1st Qu.:774   Class :character  
 Median :74483118   Mode  :character   Median :774   Mode  :character  
 Mean   :74566106                      Mean   :774                     
 3rd Qu.:77074566                      3rd Qu.:774                     
 Max.   :80351636                      Max.   :774                     
  EXIT_TIME            STAY_IN            FEED_INTK       FEEDER_ENTRY_WT
 Length:1048575     Min.   :        2   Min.   :-3892.0   Min.   :-993   
 Class :character   1st Qu.:      513   1st Qu.:  231.0   1st Qu.: 786   
 Mode  :character   Median :     1103   Median :  521.0   Median :1051   
                    Mean   :     3582   Mean   :  575.4   Mean   :1106   
                    3rd Qu.:     1757   3rd Qu.:  848.0   3rd Qu.:1399   
                    Max.   :665125948   Max.   : 9280.0   Max.   :9282   
 FEEDER_EXIT_WT     FEEDER_NO      START_DAY           END_DAY         
 Min.   :-993.0   Min.   : 1.00   Length:1048575     Length:1048575    
 1st Qu.: 414.0   1st Qu.:14.00   Class :character   Class :character  
 Median : 525.0   Median :31.00   Mode  :character   Mode  :character  
 Mean   : 530.8   Mean   :30.19                                        
 3rd Qu.: 647.0   3rd Qu.:46.00                                        
 Max.   :8182.0   Max.   :66.00

data_PIC <- mutate(data_PIC, 

                   ENTRY_DATE = as_date(mdy_hm(ENTRY_TIME, tz = "UTC")),

                   ENTRY = mdy_hm(ENTRY_TIME, tz = "UTC"),
                   
                   
                   EXIT_DATE = as_date(mdy_hm(EXIT_TIME, tz = "UTC")),

                   EXIT = mdy_hm(EXIT_TIME, tz = "UTC"), 
                   
                   START_DAY = format(as.Date(START_DAY, format = "%d-%b-%y"), "%d%m%y"),

                   OFFTEST_DAY = format(as.Date(END_DAY, format = "%d-%b-%y"), "%d%m%y"),
                   

                   )

summary(data_PIC$PEN)

   Length     Class      Mode 
  1048575 character character

dim(data_PIC)

[1] 1048575      21

class(data_PIC)

[1] "data.frame"

data_PIC$PEN <- as.factor(data_PIC$PEN)

data_PIC$Social_Group <- paste(data_PIC$PEN, data_PIC$START_DAY, data_PIC$OFFTEST_DAY, sep = "")

head(data_PIC$Social_Group)

[1] "B0315181023181223" "B0315181023181223" "B0315181023181223"
[4] "B0315181023181223" "B0315181023181223" "B0315181023181223"

data_PIC <- group_by(data_PIC, Social_Group)

Raw data arrange

data_PIC.arrange <- arrange(data_PIC, Social_Group, ENTRY, EXIT, by_group = TRUE)%>%
  mutate(line = row_number())


data_PIC.arrange <- data_PIC.arrange %>%
  dplyr::select(ID, ENTRY, EXIT, Social_Group)

head(data_PIC.arrange)

# A tibble: 6 × 4
# Groups:   Social_Group [1]
        ID ENTRY               EXIT                Social_Group     
     <int> <dttm>              <dttm>              <chr>            
1 98782150 2023-05-24 05:40:00 2023-05-24 05:40:00 B0102240523240723
2 98782154 2023-05-24 05:48:00 2023-05-24 05:49:00 B0102240523240723
3 98782330 2023-05-24 05:58:00 2023-05-24 06:00:00 B0102240523240723
4 98782150 2023-05-24 06:03:00 2023-05-24 06:03:00 B0102240523240723
5 98753116 2023-05-24 06:05:00 2023-05-24 06:07:00 B0102240523240723
6 98782152 2023-05-24 07:13:00 2023-05-24 07:26:00 B0102240523240723

dim(data_PIC.arrange)

[1] 1048575       4

Creating variable time between feeders and separating records

You can add options to executable code like this

data_PIC <- data_PIC %>%
  arrange(Social_Group, ENTRY) %>%
  group_by(Social_Group) %>%
  mutate(Follower_ID = lead(ID),
         Follower_Time = lead(ENTRY),
         Follower_Social_Group = lead(Social_Group),
         line= row_number(),
         Hour_ENTRY = hour(ENTRY),
         time_between= as.numeric(Follower_Time - EXIT, unit="secs"))%>%
  filter(time_between < 36000,time_between>=0)

60s immediate

#60immediate 
data_PIC%>%
  mutate(time_between= as.numeric(Follower_Time - ENTRY, unit="secs"),
         lapse_Time = seconds(Follower_Time - ENTRY))%>%
  dplyr::select(time_between, lapse_Time)

Adding missing grouping variables: `Social_Group`

# A tibble: 1,038,897 × 3
# Groups:   Social_Group [309]
   Social_Group      time_between lapse_Time
   <chr>                    <dbl> <Period>  
 1 B0102240523240723          480 480S      
 2 B0102240523240723          600 600S      
 3 B0102240523240723          300 300S      
 4 B0102240523240723          120 120S      
 5 B0102240523240723         4080 4080S     
 6 B0102240523240723          900 900S      
 7 B0102240523240723         2280 2280S     
 8 B0102240523240723          240 240S      
 9 B0102240523240723         1020 1020S     
10 B0102240523240723          300 300S      
# ℹ 1,038,887 more rows

data_PIC$time_between <- as.numeric(data_PIC$time_between)

data_PIC_pvalues60 <- filter (data_PIC, time_between <= 60) %>%
  mutate(TIME_FEEDER = as.numeric(STAY_IN))
dim(data_PIC_pvalues60)

[1] 707223     29

summary(data_PIC_pvalues60$TIME_FEEDER)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      4     556    1170    1266    1822   11206

head(data_PIC_pvalues60)

# A tibble: 6 × 29
# Groups:   Social_Group [1]
        ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
     <int> <int>          <int>         <int>         <int> <fct>     <int>
1 98782187    65       93704219      94286020      79385516 B0102       774
2 98793248    65       93679672      91289790      79408788 B0102       774
3 98782187    65       93704219      94286020      79385516 B0102       774
4 98793431    65       93543632      91128767      79408787 B0102       774
5 98782150    65       93543534      94380138      79399025 B0102       774
6 98782150    65       93543534      94380138      79399025 B0102       774
# ℹ 22 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>,
#   time_between <dbl>, TIME_FEEDER <dbl>

dim(data_PIC_pvalues60)

[1] 707223     29

60s distant

#60  distant 

data_PIC%>%
  mutate(time_between= as.numeric(Follower_Time - ENTRY, unit="secs"),
         lapse_Time = seconds(Follower_Time - ENTRY))%>%
  dplyr::select(time_between, lapse_Time)

Adding missing grouping variables: `Social_Group`

# A tibble: 1,038,897 × 3
# Groups:   Social_Group [309]
   Social_Group      time_between lapse_Time
   <chr>                    <dbl> <Period>  
 1 B0102240523240723          480 480S      
 2 B0102240523240723          600 600S      
 3 B0102240523240723          300 300S      
 4 B0102240523240723          120 120S      
 5 B0102240523240723         4080 4080S     
 6 B0102240523240723          900 900S      
 7 B0102240523240723         2280 2280S     
 8 B0102240523240723          240 240S      
 9 B0102240523240723         1020 1020S     
10 B0102240523240723          300 300S      
# ℹ 1,038,887 more rows

data_PIC$time_between <- as.numeric(data_PIC$time_between)

data_PIC_pvalues60_distant <- filter (data_PIC, time_between > 60) %>%
  mutate(TIME_FEEDER = as.numeric(STAY_IN))
dim(data_PIC_pvalues60_distant)

[1] 331674     29

summary(data_PIC_pvalues60_distant$TIME_FEEDER)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      2     458     982    1122    1611    9276

head(data_PIC_pvalues60_distant)

# A tibble: 6 × 29
# Groups:   Social_Group [1]
        ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
     <int> <int>          <int>         <int>         <int> <fct>     <int>
1 98782150    65       93543534      94380138      79399025 B0102       774
2 98782154    65       93543534      94380138      79399025 B0102       774
3 98782330    65       93561960      92841243      79408790 B0102       774
4 98782150    65       93543534      94380138      79399025 B0102       774
5 98753116    65       93513866      93085200      79399020 B0102       774
6 98782152    65       93543534      94380138      79399025 B0102       774
# ℹ 22 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>,
#   time_between <dbl>, TIME_FEEDER <dbl>

dim(data_PIC_pvalues60_distant)

[1] 331674     29

120s distant

#120 

data_PIC%>%
  mutate(time_between= as.numeric(Follower_Time - ENTRY, unit="secs"),
         lapse_Time = seconds(Follower_Time - ENTRY))%>%
  dplyr::select(time_between, lapse_Time)

Adding missing grouping variables: `Social_Group`

# A tibble: 1,038,897 × 3
# Groups:   Social_Group [309]
   Social_Group      time_between lapse_Time
   <chr>                    <dbl> <Period>  
 1 B0102240523240723          480 480S      
 2 B0102240523240723          600 600S      
 3 B0102240523240723          300 300S      
 4 B0102240523240723          120 120S      
 5 B0102240523240723         4080 4080S     
 6 B0102240523240723          900 900S      
 7 B0102240523240723         2280 2280S     
 8 B0102240523240723          240 240S      
 9 B0102240523240723         1020 1020S     
10 B0102240523240723          300 300S      
# ℹ 1,038,887 more rows

data_PIC$time_between <- as.numeric(data_PIC$time_between)

data_PIC_pvalues120_distant <- filter (data_PIC, time_between > 120) %>%
  mutate(TIME_FEEDER = as.numeric(STAY_IN))
dim(data_PIC_pvalues120_distant)

[1] 282428     29

summary(data_PIC_pvalues120_distant$TIME_FEEDER)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      2     458     975    1116    1598    9276

head(data_PIC_pvalues120_distant)

# A tibble: 6 × 29
# Groups:   Social_Group [1]
        ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
     <int> <int>          <int>         <int>         <int> <fct>     <int>
1 98782150    65       93543534      94380138      79399025 B0102       774
2 98782154    65       93543534      94380138      79399025 B0102       774
3 98782330    65       93561960      92841243      79408790 B0102       774
4 98753116    65       93513866      93085200      79399020 B0102       774
5 98782188    65       93704219      94286020      79385516 B0102       774
6 98791463    65       93679672      94402421      79408797 B0102       774
# ℹ 22 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>,
#   time_between <dbl>, TIME_FEEDER <dbl>

dim(data_PIC_pvalues120_distant)

[1] 282428     29

180s distant

#180

data_PIC%>%
  mutate(time_between= as.numeric(Follower_Time - ENTRY, unit="secs"),
         lapse_Time = seconds(Follower_Time - ENTRY))%>%
  dplyr::select(time_between, lapse_Time)

Adding missing grouping variables: `Social_Group`

# A tibble: 1,038,897 × 3
# Groups:   Social_Group [309]
   Social_Group      time_between lapse_Time
   <chr>                    <dbl> <Period>  
 1 B0102240523240723          480 480S      
 2 B0102240523240723          600 600S      
 3 B0102240523240723          300 300S      
 4 B0102240523240723          120 120S      
 5 B0102240523240723         4080 4080S     
 6 B0102240523240723          900 900S      
 7 B0102240523240723         2280 2280S     
 8 B0102240523240723          240 240S      
 9 B0102240523240723         1020 1020S     
10 B0102240523240723          300 300S      
# ℹ 1,038,887 more rows

data_PIC$time_between <- as.numeric(data_PIC$time_between)

data_PIC_pvalues180_distant <- filter (data_PIC, time_between > 180) %>%
  mutate(TIME_FEEDER = as.numeric(STAY_IN))
dim(data_PIC_pvalues180_distant)

[1] 257203     29

summary(data_PIC_pvalues180_distant$TIME_FEEDER)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      2     465     978    1119    1599    9276

head(data_PIC_pvalues180_distant)

# A tibble: 6 × 29
# Groups:   Social_Group [1]
        ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
     <int> <int>          <int>         <int>         <int> <fct>     <int>
1 98782150    65       93543534      94380138      79399025 B0102       774
2 98782154    65       93543534      94380138      79399025 B0102       774
3 98753116    65       93513866      93085200      79399020 B0102       774
4 98782188    65       93704219      94286020      79385516 B0102       774
5 98793248    65       93679672      91289790      79408788 B0102       774
6 98791463    65       93679672      94402421      79408797 B0102       774
# ℹ 22 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>,
#   time_between <dbl>, TIME_FEEDER <dbl>

dim(data_PIC_pvalues180_distant)

[1] 257203     29

240s distant

#240

data_PIC%>%
  mutate(time_between= as.numeric(Follower_Time - ENTRY, unit="secs"),
         lapse_Time = seconds(Follower_Time - ENTRY))%>%
  dplyr::select(time_between, lapse_Time)

Adding missing grouping variables: `Social_Group`

# A tibble: 1,038,897 × 3
# Groups:   Social_Group [309]
   Social_Group      time_between lapse_Time
   <chr>                    <dbl> <Period>  
 1 B0102240523240723          480 480S      
 2 B0102240523240723          600 600S      
 3 B0102240523240723          300 300S      
 4 B0102240523240723          120 120S      
 5 B0102240523240723         4080 4080S     
 6 B0102240523240723          900 900S      
 7 B0102240523240723         2280 2280S     
 8 B0102240523240723          240 240S      
 9 B0102240523240723         1020 1020S     
10 B0102240523240723          300 300S      
# ℹ 1,038,887 more rows

data_PIC$time_between <- as.numeric(data_PIC$time_between)

data_PIC_pvalues240_distant <- filter (data_PIC, time_between > 240) %>%
  mutate(TIME_FEEDER = as.numeric(STAY_IN))
dim(data_PIC_pvalues240_distant)

[1] 239488     29

summary(data_PIC_pvalues240_distant$TIME_FEEDER)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      2     470     980    1121    1599    8592

head(data_PIC_pvalues240_distant)

# A tibble: 6 × 29
# Groups:   Social_Group [1]
        ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
     <int> <int>          <int>         <int>         <int> <fct>     <int>
1 98782150    65       93543534      94380138      79399025 B0102       774
2 98782154    65       93543534      94380138      79399025 B0102       774
3 98753116    65       93513866      93085200      79399020 B0102       774
4 98782188    65       93704219      94286020      79385516 B0102       774
5 98793248    65       93679672      91289790      79408788 B0102       774
6 98791463    65       93679672      94402421      79408797 B0102       774
# ℹ 22 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>,
#   time_between <dbl>, TIME_FEEDER <dbl>

dim(data_PIC_pvalues240_distant)

[1] 239488     29

300s distant

#300

data_PIC%>%
  mutate(time_between= as.numeric(Follower_Time - ENTRY, unit="secs"),
         lapse_Time = seconds(Follower_Time - ENTRY))%>%
  dplyr::select(time_between, lapse_Time)

Adding missing grouping variables: `Social_Group`

# A tibble: 1,038,897 × 3
# Groups:   Social_Group [309]
   Social_Group      time_between lapse_Time
   <chr>                    <dbl> <Period>  
 1 B0102240523240723          480 480S      
 2 B0102240523240723          600 600S      
 3 B0102240523240723          300 300S      
 4 B0102240523240723          120 120S      
 5 B0102240523240723         4080 4080S     
 6 B0102240523240723          900 900S      
 7 B0102240523240723         2280 2280S     
 8 B0102240523240723          240 240S      
 9 B0102240523240723         1020 1020S     
10 B0102240523240723          300 300S      
# ℹ 1,038,887 more rows

data_PIC$time_between <- as.numeric(data_PIC$time_between)

data_PIC_pvalues300_distant <- filter (data_PIC, time_between > 300) %>%
  mutate(TIME_FEEDER = as.numeric(STAY_IN))
dim(data_PIC_pvalues300_distant)

[1] 224191     29

summary(data_PIC_pvalues300_distant$TIME_FEEDER)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      2     473     980    1121    1598    8592

Number of records per threshold

dim(data_PIC_pvalues60)

[1] 707223     29

dim(data_PIC_pvalues60_distant)

[1] 331674     29

dim(data_PIC_pvalues120_distant)

[1] 282428     29

dim(data_PIC_pvalues180_distant)

[1] 257203     29

dim(data_PIC_pvalues240_distant)

[1] 239488     29

dim(data_PIC_pvalues300_distant)

[1] 224191     29

Converting to Logarithm of time

data_PIC_pvalues60 <- data_PIC_pvalues60 %>%
  mutate(L_time = log(TIME_FEEDER))

data_PIC_pvalues60_distant <- data_PIC_pvalues60_distant %>%
  mutate(L_time = log(TIME_FEEDER))

data_PIC_pvalues120_distant <- data_PIC_pvalues120_distant %>%
  mutate(L_time = log(TIME_FEEDER))

data_PIC_pvalues180_distant <- data_PIC_pvalues180_distant %>%
  mutate(L_time = log(TIME_FEEDER))

data_PIC_pvalues240_distant <- data_PIC_pvalues240_distant %>%
  mutate(L_time = log(TIME_FEEDER))

data_PIC_pvalues300_distant <- data_PIC_pvalues300_distant %>%
  mutate(L_time = log(TIME_FEEDER))

dim(data_PIC_pvalues60)

[1] 707223     30

dim(data_PIC_pvalues60_distant)

[1] 331674     30

dim(data_PIC_pvalues120_distant)

[1] 282428     30

dim(data_PIC_pvalues180_distant)

[1] 257203     30

dim(data_PIC_pvalues240_distant)

[1] 239488     30

dim(data_PIC_pvalues300_distant)

[1] 224191     30

data_PIC_pvalues60

# A tibble: 707,223 × 30
# Groups:   Social_Group [308]
         ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
      <int> <int>          <int>         <int>         <int> <fct>     <int>
 1 98782187    65       93704219      94286020      79385516 B0102       774
 2 98793248    65       93679672      91289790      79408788 B0102       774
 3 98782187    65       93704219      94286020      79385516 B0102       774
 4 98793431    65       93543632      91128767      79408787 B0102       774
 5 98782150    65       93543534      94380138      79399025 B0102       774
 6 98782150    65       93543534      94380138      79399025 B0102       774
 7 98782187    65       93704219      94286020      79385516 B0102       774
 8 98791463    65       93679672      94402421      79408797 B0102       774
 9 98782154    65       93543534      94380138      79399025 B0102       774
10 98791463    65       93679672      94402421      79408797 B0102       774
# ℹ 707,213 more rows
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>, …

data_PIC_pvalues60_distant

# A tibble: 331,674 × 30
# Groups:   Social_Group [309]
         ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
      <int> <int>          <int>         <int>         <int> <fct>     <int>
 1 98782150    65       93543534      94380138      79399025 B0102       774
 2 98782154    65       93543534      94380138      79399025 B0102       774
 3 98782330    65       93561960      92841243      79408790 B0102       774
 4 98782150    65       93543534      94380138      79399025 B0102       774
 5 98753116    65       93513866      93085200      79399020 B0102       774
 6 98782152    65       93543534      94380138      79399025 B0102       774
 7 98782188    65       93704219      94286020      79385516 B0102       774
 8 98791463    65       93679672      94402421      79408797 B0102       774
 9 98782150    65       93543534      94380138      79399025 B0102       774
10 98793248    65       93679672      91289790      79408788 B0102       774
# ℹ 331,664 more rows
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>, …

data_PIC_pvalues120_distant

# A tibble: 282,428 × 30
# Groups:   Social_Group [309]
         ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
      <int> <int>          <int>         <int>         <int> <fct>     <int>
 1 98782150    65       93543534      94380138      79399025 B0102       774
 2 98782154    65       93543534      94380138      79399025 B0102       774
 3 98782330    65       93561960      92841243      79408790 B0102       774
 4 98753116    65       93513866      93085200      79399020 B0102       774
 5 98782188    65       93704219      94286020      79385516 B0102       774
 6 98791463    65       93679672      94402421      79408797 B0102       774
 7 98793248    65       93679672      91289790      79408788 B0102       774
 8 98793431    65       93543632      91128767      79408787 B0102       774
 9 98791463    65       93679672      94402421      79408797 B0102       774
10 98782154    65       93543534      94380138      79399025 B0102       774
# ℹ 282,418 more rows
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>, …

data_PIC_pvalues180_distant

# A tibble: 257,203 × 30
# Groups:   Social_Group [309]
         ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
      <int> <int>          <int>         <int>         <int> <fct>     <int>
 1 98782150    65       93543534      94380138      79399025 B0102       774
 2 98782154    65       93543534      94380138      79399025 B0102       774
 3 98753116    65       93513866      93085200      79399020 B0102       774
 4 98782188    65       93704219      94286020      79385516 B0102       774
 5 98793248    65       93679672      91289790      79408788 B0102       774
 6 98791463    65       93679672      94402421      79408797 B0102       774
 7 98782154    65       93543534      94380138      79399025 B0102       774
 8 98793248    65       93679672      91289790      79408788 B0102       774
 9 98782188    65       93704219      94286020      79385516 B0102       774
10 98753116    65       93513866      93085200      79399020 B0102       774
# ℹ 257,193 more rows
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>, …

data_PIC_pvalues240_distant

# A tibble: 239,488 × 30
# Groups:   Social_Group [309]
         ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
      <int> <int>          <int>         <int>         <int> <fct>     <int>
 1 98782150    65       93543534      94380138      79399025 B0102       774
 2 98782154    65       93543534      94380138      79399025 B0102       774
 3 98753116    65       93513866      93085200      79399020 B0102       774
 4 98782188    65       93704219      94286020      79385516 B0102       774
 5 98793248    65       93679672      91289790      79408788 B0102       774
 6 98791463    65       93679672      94402421      79408797 B0102       774
 7 98782154    65       93543534      94380138      79399025 B0102       774
 8 98793248    65       93679672      91289790      79408788 B0102       774
 9 98782188    65       93704219      94286020      79385516 B0102       774
10 98753116    65       93513866      93085200      79399020 B0102       774
# ℹ 239,478 more rows
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>, …

data_PIC_pvalues300_distant

# A tibble: 224,191 × 30
# Groups:   Social_Group [309]
         ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
      <int> <int>          <int>         <int>         <int> <fct>     <int>
 1 98782150    65       93543534      94380138      79399025 B0102       774
 2 98782154    65       93543534      94380138      79399025 B0102       774
 3 98753116    65       93513866      93085200      79399020 B0102       774
 4 98782188    65       93704219      94286020      79385516 B0102       774
 5 98791463    65       93679672      94402421      79408797 B0102       774
 6 98782154    65       93543534      94380138      79399025 B0102       774
 7 98793248    65       93679672      91289790      79408788 B0102       774
 8 98782188    65       93704219      94286020      79385516 B0102       774
 9 98753116    65       93513866      93085200      79399020 B0102       774
10 98782150    65       93543534      94380138      79399025 B0102       774
# ℹ 224,181 more rows
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>, …

Checking if immediate or distant

umbral <- 60

total_counts1 <- data_PIC_pvalues60 %>%
  mutate(time_between_group = case_when(
    time_between <= umbral ~ "immediate",
    time_between > umbral ~ "distant"
  )) %>%
  group_by(time_between_group) %>%
  summarise(total = n())  

print(total_counts1)

# A tibble: 1 × 2
  time_between_group  total
  <chr>               <int>
1 immediate          707223

total_counts2 <- data_PIC_pvalues60_distant %>%
  mutate(time_between_group = case_when(
    time_between <= umbral ~ "immediate",
    time_between > umbral ~ "distant"
  )) %>%
  group_by(time_between_group) %>%
  summarise(total = n())  

print(total_counts2)

# A tibble: 1 × 2
  time_between_group  total
  <chr>               <int>
1 distant            331674

total_counts3 <- data_PIC_pvalues120_distant %>%
  mutate(time_between_group = case_when(
    time_between <= umbral ~ "immediate",
    time_between > umbral ~ "distant"
  )) %>%
  group_by(time_between_group) %>%
  summarise(total = n())  

print(total_counts3)

# A tibble: 1 × 2
  time_between_group  total
  <chr>               <int>
1 distant            282428

total_counts4 <- data_PIC_pvalues180_distant %>%
  mutate(time_between_group = case_when(
    time_between <= umbral ~ "immediate",
    time_between > umbral ~ "distant"
  )) %>%
  group_by(time_between_group) %>%
  summarise(total = n())  

print(total_counts4)

# A tibble: 1 × 2
  time_between_group  total
  <chr>               <int>
1 distant            257203

total_counts5 <- data_PIC_pvalues240_distant %>%
  mutate(time_between_group = case_when(
    time_between <= umbral ~ "immediate",
    time_between > umbral ~ "distant"
  )) %>%
  group_by(time_between_group) %>%
  summarise(total = n())  

print(total_counts5)

# A tibble: 1 × 2
  time_between_group  total
  <chr>               <int>
1 distant            239488

total_counts6 <- data_PIC_pvalues300_distant %>%
  mutate(time_between_group = case_when(
    time_between <= umbral ~ "immediate",
    time_between > umbral ~ "distant"
  )) %>%
  group_by(time_between_group) %>%
  summarise(total = n())  

print(total_counts6)

# A tibble: 1 × 2
  time_between_group  total
  <chr>               <int>
1 distant            224191

Checking social groups with filter 60inmediate

head(data_PIC_pvalues60)

# A tibble: 6 × 30
# Groups:   Social_Group [1]
        ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
     <int> <int>          <int>         <int>         <int> <fct>     <int>
1 98782187    65       93704219      94286020      79385516 B0102       774
2 98793248    65       93679672      91289790      79408788 B0102       774
3 98782187    65       93704219      94286020      79385516 B0102       774
4 98793431    65       93543632      91128767      79408787 B0102       774
5 98782150    65       93543534      94380138      79399025 B0102       774
6 98782150    65       93543534      94380138      79399025 B0102       774
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>,
#   time_between <dbl>, TIME_FEEDER <dbl>, L_time <dbl>

dim(data_PIC_pvalues60)

[1] 707223     30

# Arrange the data and add a row number
data_PIC_arranged <- data_PIC_pvalues60 %>%
  arrange(Social_Group, ENTRY, EXIT, by_group = TRUE) %>%
  mutate(line = row_number())
dim(data_PIC_arranged)

[1] 707223     30

# Filter the data and select specific columns
data_PIC_filtered <- data_PIC_pvalues60 %>%
  filter(ID != Follower_ID) %>%
  dplyr::select(ID, Follower_ID, ENTRY, EXIT, STAY_IN, Social_Group, L_time, Hour_ENTRY, time_between, FEED_INTK, PEN)


data_PIC_pvalues60

# A tibble: 707,223 × 30
# Groups:   Social_Group [308]
         ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
      <int> <int>          <int>         <int>         <int> <fct>     <int>
 1 98782187    65       93704219      94286020      79385516 B0102       774
 2 98793248    65       93679672      91289790      79408788 B0102       774
 3 98782187    65       93704219      94286020      79385516 B0102       774
 4 98793431    65       93543632      91128767      79408787 B0102       774
 5 98782150    65       93543534      94380138      79399025 B0102       774
 6 98782150    65       93543534      94380138      79399025 B0102       774
 7 98782187    65       93704219      94286020      79385516 B0102       774
 8 98791463    65       93679672      94402421      79408797 B0102       774
 9 98782154    65       93543534      94380138      79399025 B0102       774
10 98791463    65       93679672      94402421      79408797 B0102       774
# ℹ 707,213 more rows
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>, …

data_PIC_filtered

# A tibble: 694,524 × 11
# Groups:   Social_Group [306]
         ID Follower_ID ENTRY               EXIT                STAY_IN
      <int>       <int> <dttm>              <dttm>                <int>
 1 98782187    98791463 2023-05-24 08:36:00 2023-05-24 08:48:00     718
 2 98793248    98782188 2023-05-24 09:51:00 2023-05-24 09:51:00      13
 3 98782187    98793248 2023-05-24 11:33:00 2023-05-24 11:42:00     556
 4 98793431    98782154 2023-05-24 13:26:00 2023-05-24 13:50:00    1494
 5 98782150    98793249 2023-05-24 13:59:00 2023-05-24 14:03:00     252
 6 98782150    98782187 2023-05-24 14:07:00 2023-05-24 14:12:00     301
 7 98782187    98791460 2023-05-24 14:13:00 2023-05-24 14:36:00    1394
 8 98791463    98782154 2023-05-24 15:56:00 2023-05-24 16:24:00    1695
 9 98782154    98791463 2023-05-24 16:24:00 2023-05-24 16:24:00       7
10 98791463    98782154 2023-05-24 16:24:00 2023-05-24 16:25:00      15
# ℹ 694,514 more rows
# ℹ 6 more variables: Social_Group <chr>, L_time <dbl>, Hour_ENTRY <int>,
#   time_between <dbl>, FEED_INTK <int>, PEN <fct>

dim(data_PIC_filtered)

[1] 694524     11

dim(data_PIC_pvalues60)

[1] 707223     30

#| warning: true
#| echo: true

unique_ids_per_group <- data_PIC_filtered %>%
  group_by(Social_Group) %>%
  summarise(Unique_IDs = n_distinct(ID),
            .groups = 'drop')  

print(unique_ids_per_group)

# A tibble: 306 × 2
   Social_Group      Unique_IDs
   <chr>                  <int>
 1 B0102240523240723         16
 2 B0104090519160719         15
 3 B0104180719250919         15
 4 B0104201218260219         14
 5 B0104240523240723         16
 6 B0104240621300821         14
 7 B0104280219070519         15
 8 B0106090519160719          2
 9 B0106090720140920         16
10 B0106130220130420         16
# ℹ 296 more rows

##no filte4r
unique_ids_per_group1 <- data_PIC_pvalues60 %>%
  group_by(Social_Group) %>%
  summarise(Unique_IDs = n_distinct(ID),
            .groups = 'drop')  

print(unique_ids_per_group1)

# A tibble: 308 × 2
   Social_Group      Unique_IDs
   <chr>                  <int>
 1 B0102240523240723         16
 2 B0104090519160719         15
 3 B0104180719250919         15
 4 B0104201218260219         14
 5 B0104240523240723         16
 6 B0104240621300821         14
 7 B0104280219070519         15
 8 B0106090519160719          2
 9 B0106090720140920         16
10 B0106130220130420         16
# ℹ 298 more rows

unique_ids_per_group <- data_PIC_filtered %>%
  group_by(Social_Group) %>%
  summarise(Unique_IDs = n_distinct(ID),
            .groups = 'drop') %>%
  # Calculate the mean of Unique_IDs across all groups
  summarise(Mean_Unique_IDs = mean(Unique_IDs))

# View the results
print(unique_ids_per_group)

# A tibble: 1 × 1
  Mean_Unique_IDs
            <dbl>
1            14.4

table(unique_ids_per_group1$Unique_IDs)


  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16 
  2   3   1   1   3   2   3   6   1   3   3   6  19  39  93 123

table(unique_ids_per_group$Unique_IDs)

Warning: Unknown or uninitialised column: `Unique_IDs`.

< table of extent 0 >

data_PIC_filtered0 <- data_PIC_filtered %>%
  group_by(Social_Group) %>%
  summarise(Unique_Animal_Count = n_distinct(ID), 
            FEED_INTK_SUM = sum(FEED_INTK, na.rm = TRUE), .groups = 'drop')
###ggplot x vs y 
ggplot(data_PIC_filtered0, aes(x = Unique_Animal_Count, y = FEED_INTK_SUM)) +
  geom_point() +
  geom_smooth()

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at 16.07

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 2.07

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1.0962e-15

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 1

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
16.07

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
2.07

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
number 1.0962e-15

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : There are other near
singularities as well. 1

self_interaction_count <- data_PIC_pvalues60 %>%
  filter(ID == Follower_ID) %>%
  nrow()

self_interaction_count

[1] 12699

Checking social groups with filter 60distant

head(data_PIC_pvalues60_distant)

# A tibble: 6 × 30
# Groups:   Social_Group [1]
        ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
     <int> <int>          <int>         <int>         <int> <fct>     <int>
1 98782150    65       93543534      94380138      79399025 B0102       774
2 98782154    65       93543534      94380138      79399025 B0102       774
3 98782330    65       93561960      92841243      79408790 B0102       774
4 98782150    65       93543534      94380138      79399025 B0102       774
5 98753116    65       93513866      93085200      79399020 B0102       774
6 98782152    65       93543534      94380138      79399025 B0102       774
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>,
#   time_between <dbl>, TIME_FEEDER <dbl>, L_time <dbl>

dim(data_PIC_pvalues60_distant)

[1] 331674     30

# Arrange the data and add a row number
data_PIC_arranged1 <- data_PIC_pvalues60_distant %>%
  arrange(Social_Group, ENTRY, EXIT, by_group = TRUE) %>%
  mutate(line = row_number())
dim(data_PIC_arranged1)

[1] 331674     30

# Filter the data and select specific columns
data_PIC_filtered1 <- data_PIC_pvalues60_distant %>%
  filter(ID != Follower_ID) %>%
  dplyr::select(ID, Follower_ID, ENTRY, EXIT, STAY_IN, Social_Group, L_time, Hour_ENTRY, time_between, FEED_INTK, PEN)


data_PIC_pvalues60_distant

# A tibble: 331,674 × 30
# Groups:   Social_Group [309]
         ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
      <int> <int>          <int>         <int>         <int> <fct>     <int>
 1 98782150    65       93543534      94380138      79399025 B0102       774
 2 98782154    65       93543534      94380138      79399025 B0102       774
 3 98782330    65       93561960      92841243      79408790 B0102       774
 4 98782150    65       93543534      94380138      79399025 B0102       774
 5 98753116    65       93513866      93085200      79399020 B0102       774
 6 98782152    65       93543534      94380138      79399025 B0102       774
 7 98782188    65       93704219      94286020      79385516 B0102       774
 8 98791463    65       93679672      94402421      79408797 B0102       774
 9 98782150    65       93543534      94380138      79399025 B0102       774
10 98793248    65       93679672      91289790      79408788 B0102       774
# ℹ 331,664 more rows
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>, …

data_PIC_filtered1

# A tibble: 303,477 × 11
# Groups:   Social_Group [306]
         ID Follower_ID ENTRY               EXIT                STAY_IN
      <int>       <int> <dttm>              <dttm>                <int>
 1 98782150    98782154 2023-05-24 05:40:00 2023-05-24 05:40:00       7
 2 98782154    98782330 2023-05-24 05:48:00 2023-05-24 05:49:00      45
 3 98782330    98782150 2023-05-24 05:58:00 2023-05-24 06:00:00     122
 4 98782150    98753116 2023-05-24 06:03:00 2023-05-24 06:03:00      24
 5 98753116    98782152 2023-05-24 06:05:00 2023-05-24 06:07:00     131
 6 98782152    98782188 2023-05-24 07:13:00 2023-05-24 07:26:00     814
 7 98782188    98791463 2023-05-24 07:28:00 2023-05-24 07:38:00     552
 8 98791463    98782150 2023-05-24 08:06:00 2023-05-24 08:07:00      53
 9 98782150    98793248 2023-05-24 08:10:00 2023-05-24 08:25:00     905
10 98793248    98793431 2023-05-24 08:27:00 2023-05-24 08:27:00      25
# ℹ 303,467 more rows
# ℹ 6 more variables: Social_Group <chr>, L_time <dbl>, Hour_ENTRY <int>,
#   time_between <dbl>, FEED_INTK <int>, PEN <fct>

dim(data_PIC_filtered1)

[1] 303477     11

dim(data_PIC_pvalues60_distant)

[1] 331674     30

Checking social groups with filter 120distant

head(data_PIC_pvalues120_distant)

# A tibble: 6 × 30
# Groups:   Social_Group [1]
        ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
     <int> <int>          <int>         <int>         <int> <fct>     <int>
1 98782150    65       93543534      94380138      79399025 B0102       774
2 98782154    65       93543534      94380138      79399025 B0102       774
3 98782330    65       93561960      92841243      79408790 B0102       774
4 98753116    65       93513866      93085200      79399020 B0102       774
5 98782188    65       93704219      94286020      79385516 B0102       774
6 98791463    65       93679672      94402421      79408797 B0102       774
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>,
#   time_between <dbl>, TIME_FEEDER <dbl>, L_time <dbl>

dim(data_PIC_pvalues120_distant)

[1] 282428     30

# Arrange the data and add a row number
data_PIC_arranged2 <- data_PIC_pvalues120_distant %>%
  arrange(Social_Group, ENTRY, EXIT, by_group = TRUE) %>%
  mutate(line = row_number())
dim(data_PIC_arranged2)

[1] 282428     30

# Filter the data and select specific columns
data_PIC_filtered2 <- data_PIC_pvalues120_distant %>%
  filter(ID != Follower_ID) %>%
  dplyr::select(ID, Follower_ID, ENTRY, EXIT, STAY_IN, Social_Group, L_time, Hour_ENTRY, time_between, FEED_INTK, PEN)


data_PIC_pvalues120_distant

# A tibble: 282,428 × 30
# Groups:   Social_Group [309]
         ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
      <int> <int>          <int>         <int>         <int> <fct>     <int>
 1 98782150    65       93543534      94380138      79399025 B0102       774
 2 98782154    65       93543534      94380138      79399025 B0102       774
 3 98782330    65       93561960      92841243      79408790 B0102       774
 4 98753116    65       93513866      93085200      79399020 B0102       774
 5 98782188    65       93704219      94286020      79385516 B0102       774
 6 98791463    65       93679672      94402421      79408797 B0102       774
 7 98793248    65       93679672      91289790      79408788 B0102       774
 8 98793431    65       93543632      91128767      79408787 B0102       774
 9 98791463    65       93679672      94402421      79408797 B0102       774
10 98782154    65       93543534      94380138      79399025 B0102       774
# ℹ 282,418 more rows
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>, …

data_PIC_filtered2

# A tibble: 255,815 × 11
# Groups:   Social_Group [306]
         ID Follower_ID ENTRY               EXIT                STAY_IN
      <int>       <int> <dttm>              <dttm>                <int>
 1 98782150    98782154 2023-05-24 05:40:00 2023-05-24 05:40:00       7
 2 98782154    98782330 2023-05-24 05:48:00 2023-05-24 05:49:00      45
 3 98782330    98782150 2023-05-24 05:58:00 2023-05-24 06:00:00     122
 4 98753116    98782152 2023-05-24 06:05:00 2023-05-24 06:07:00     131
 5 98782188    98791463 2023-05-24 07:28:00 2023-05-24 07:38:00     552
 6 98791463    98782150 2023-05-24 08:06:00 2023-05-24 08:07:00      53
 7 98793248    98793431 2023-05-24 08:27:00 2023-05-24 08:27:00      25
 8 98793431    98782187 2023-05-24 08:32:00 2023-05-24 08:33:00      19
 9 98791463    98782154 2023-05-24 08:49:00 2023-05-24 08:49:00       8
10 98782188    98753116 2023-05-24 09:52:00 2023-05-24 10:03:00     632
# ℹ 255,805 more rows
# ℹ 6 more variables: Social_Group <chr>, L_time <dbl>, Hour_ENTRY <int>,
#   time_between <dbl>, FEED_INTK <int>, PEN <fct>

dim(data_PIC_filtered2)

[1] 255815     11

dim(data_PIC_pvalues120_distant)

[1] 282428     30

Checking social groups with filter 180distant

head(data_PIC_pvalues180_distant)

# A tibble: 6 × 30
# Groups:   Social_Group [1]
        ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
     <int> <int>          <int>         <int>         <int> <fct>     <int>
1 98782150    65       93543534      94380138      79399025 B0102       774
2 98782154    65       93543534      94380138      79399025 B0102       774
3 98753116    65       93513866      93085200      79399020 B0102       774
4 98782188    65       93704219      94286020      79385516 B0102       774
5 98793248    65       93679672      91289790      79408788 B0102       774
6 98791463    65       93679672      94402421      79408797 B0102       774
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>,
#   time_between <dbl>, TIME_FEEDER <dbl>, L_time <dbl>

dim(data_PIC_pvalues180_distant)

[1] 257203     30

# Arrange the data and add a row number
data_PIC_arranged3 <- data_PIC_pvalues180_distant %>%
  arrange(Social_Group, ENTRY, EXIT, by_group = TRUE) %>%
  mutate(line = row_number())
dim(data_PIC_arranged3)

[1] 257203     30

# Filter the data and select specific columns
data_PIC_filtered3 <- data_PIC_pvalues180_distant %>%
  filter(ID != Follower_ID) %>%
  dplyr::select(ID, Follower_ID, ENTRY, EXIT, STAY_IN, Social_Group, L_time, Hour_ENTRY, time_between, FEED_INTK, PEN)


data_PIC_pvalues180_distant

# A tibble: 257,203 × 30
# Groups:   Social_Group [309]
         ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
      <int> <int>          <int>         <int>         <int> <fct>     <int>
 1 98782150    65       93543534      94380138      79399025 B0102       774
 2 98782154    65       93543534      94380138      79399025 B0102       774
 3 98753116    65       93513866      93085200      79399020 B0102       774
 4 98782188    65       93704219      94286020      79385516 B0102       774
 5 98793248    65       93679672      91289790      79408788 B0102       774
 6 98791463    65       93679672      94402421      79408797 B0102       774
 7 98782154    65       93543534      94380138      79399025 B0102       774
 8 98793248    65       93679672      91289790      79408788 B0102       774
 9 98782188    65       93704219      94286020      79385516 B0102       774
10 98753116    65       93513866      93085200      79399020 B0102       774
# ℹ 257,193 more rows
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>, …

data_PIC_filtered3

# A tibble: 231,413 × 11
# Groups:   Social_Group [306]
         ID Follower_ID ENTRY               EXIT                STAY_IN
      <int>       <int> <dttm>              <dttm>                <int>
 1 98782150    98782154 2023-05-24 05:40:00 2023-05-24 05:40:00       7
 2 98782154    98782330 2023-05-24 05:48:00 2023-05-24 05:49:00      45
 3 98753116    98782152 2023-05-24 06:05:00 2023-05-24 06:07:00     131
 4 98782188    98791463 2023-05-24 07:28:00 2023-05-24 07:38:00     552
 5 98793248    98793431 2023-05-24 08:27:00 2023-05-24 08:27:00      25
 6 98791463    98782154 2023-05-24 08:49:00 2023-05-24 08:49:00       8
 7 98782188    98753116 2023-05-24 09:52:00 2023-05-24 10:03:00     632
 8 98753116    98791460 2023-05-24 10:11:00 2023-05-24 10:24:00     778
 9 98791460    98782150 2023-05-24 14:36:00 2023-05-24 14:48:00     722
10 98782150    98791463 2023-05-24 14:52:00 2023-05-24 15:10:00    1096
# ℹ 231,403 more rows
# ℹ 6 more variables: Social_Group <chr>, L_time <dbl>, Hour_ENTRY <int>,
#   time_between <dbl>, FEED_INTK <int>, PEN <fct>

dim(data_PIC_filtered3)

[1] 231413     11

dim(data_PIC_pvalues180_distant)

[1] 257203     30

Checking social groups with filter 240distant

head(data_PIC_pvalues240_distant)

# A tibble: 6 × 30
# Groups:   Social_Group [1]
        ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
     <int> <int>          <int>         <int>         <int> <fct>     <int>
1 98782150    65       93543534      94380138      79399025 B0102       774
2 98782154    65       93543534      94380138      79399025 B0102       774
3 98753116    65       93513866      93085200      79399020 B0102       774
4 98782188    65       93704219      94286020      79385516 B0102       774
5 98793248    65       93679672      91289790      79408788 B0102       774
6 98791463    65       93679672      94402421      79408797 B0102       774
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>,
#   time_between <dbl>, TIME_FEEDER <dbl>, L_time <dbl>

dim(data_PIC_pvalues240_distant)

[1] 239488     30

# Arrange the data and add a row number
data_PIC_arranged4 <- data_PIC_pvalues240_distant %>%
  arrange(Social_Group, ENTRY, EXIT, by_group = TRUE) %>%
  mutate(line = row_number())
dim(data_PIC_arranged4)

[1] 239488     30

# Filter the data and select specific columns
data_PIC_filtered4 <- data_PIC_pvalues240_distant %>%
  filter(ID != Follower_ID) %>%
  dplyr::select(ID, Follower_ID, ENTRY, EXIT, STAY_IN, Social_Group, L_time, Hour_ENTRY, time_between, FEED_INTK, PEN)


data_PIC_pvalues240_distant

# A tibble: 239,488 × 30
# Groups:   Social_Group [309]
         ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
      <int> <int>          <int>         <int>         <int> <fct>     <int>
 1 98782150    65       93543534      94380138      79399025 B0102       774
 2 98782154    65       93543534      94380138      79399025 B0102       774
 3 98753116    65       93513866      93085200      79399020 B0102       774
 4 98782188    65       93704219      94286020      79385516 B0102       774
 5 98793248    65       93679672      91289790      79408788 B0102       774
 6 98791463    65       93679672      94402421      79408797 B0102       774
 7 98782154    65       93543534      94380138      79399025 B0102       774
 8 98793248    65       93679672      91289790      79408788 B0102       774
 9 98782188    65       93704219      94286020      79385516 B0102       774
10 98753116    65       93513866      93085200      79399020 B0102       774
# ℹ 239,478 more rows
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>, …

data_PIC_filtered4

# A tibble: 214,224 × 11
# Groups:   Social_Group [306]
         ID Follower_ID ENTRY               EXIT                STAY_IN
      <int>       <int> <dttm>              <dttm>                <int>
 1 98782150    98782154 2023-05-24 05:40:00 2023-05-24 05:40:00       7
 2 98782154    98782330 2023-05-24 05:48:00 2023-05-24 05:49:00      45
 3 98753116    98782152 2023-05-24 06:05:00 2023-05-24 06:07:00     131
 4 98782188    98791463 2023-05-24 07:28:00 2023-05-24 07:38:00     552
 5 98793248    98793431 2023-05-24 08:27:00 2023-05-24 08:27:00      25
 6 98791463    98782154 2023-05-24 08:49:00 2023-05-24 08:49:00       8
 7 98782188    98753116 2023-05-24 09:52:00 2023-05-24 10:03:00     632
 8 98753116    98791460 2023-05-24 10:11:00 2023-05-24 10:24:00     778
 9 98782150    98791463 2023-05-24 14:52:00 2023-05-24 15:10:00    1096
10 98782188    98782187 2023-05-24 17:05:00 2023-05-24 17:05:00      11
# ℹ 214,214 more rows
# ℹ 6 more variables: Social_Group <chr>, L_time <dbl>, Hour_ENTRY <int>,
#   time_between <dbl>, FEED_INTK <int>, PEN <fct>

dim(data_PIC_filtered4)

[1] 214224     11

dim(data_PIC_pvalues240_distant)

[1] 239488     30

Checking social groups with filter 300distant

head(data_PIC_pvalues300_distant)

# A tibble: 6 × 30
# Groups:   Social_Group [1]
        ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
     <int> <int>          <int>         <int>         <int> <fct>     <int>
1 98782150    65       93543534      94380138      79399025 B0102       774
2 98782154    65       93543534      94380138      79399025 B0102       774
3 98753116    65       93513866      93085200      79399020 B0102       774
4 98782188    65       93704219      94286020      79385516 B0102       774
5 98791463    65       93679672      94402421      79408797 B0102       774
6 98782154    65       93543534      94380138      79399025 B0102       774
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>,
#   time_between <dbl>, TIME_FEEDER <dbl>, L_time <dbl>

dim(data_PIC_pvalues300_distant)

[1] 224191     30

# Arrange the data and add a row number
data_PIC_arranged5 <- data_PIC_pvalues300_distant %>%
  arrange(Social_Group, ENTRY, EXIT, by_group = TRUE) %>%
  mutate(line = row_number())
dim(data_PIC_arranged5)

[1] 224191     30

# Filter the data and select specific columns
data_PIC_filtered5 <- data_PIC_pvalues300_distant %>%
  filter(ID != Follower_ID) %>%
  dplyr::select(ID, Follower_ID, ENTRY, EXIT, STAY_IN, Social_Group, L_time, Hour_ENTRY, time_between, FEED_INTK, PEN)


data_PIC_pvalues300_distant

# A tibble: 224,191 × 30
# Groups:   Social_Group [309]
         ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
      <int> <int>          <int>         <int>         <int> <fct>     <int>
 1 98782150    65       93543534      94380138      79399025 B0102       774
 2 98782154    65       93543534      94380138      79399025 B0102       774
 3 98753116    65       93513866      93085200      79399020 B0102       774
 4 98782188    65       93704219      94286020      79385516 B0102       774
 5 98791463    65       93679672      94402421      79408797 B0102       774
 6 98782154    65       93543534      94380138      79399025 B0102       774
 7 98793248    65       93679672      91289790      79408788 B0102       774
 8 98782188    65       93704219      94286020      79385516 B0102       774
 9 98753116    65       93513866      93085200      79399020 B0102       774
10 98782150    65       93543534      94380138      79399025 B0102       774
# ℹ 224,181 more rows
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>, …

data_PIC_filtered5

# A tibble: 200,702 × 11
# Groups:   Social_Group [306]
         ID Follower_ID ENTRY               EXIT                STAY_IN
      <int>       <int> <dttm>              <dttm>                <int>
 1 98782150    98782154 2023-05-24 05:40:00 2023-05-24 05:40:00       7
 2 98782154    98782330 2023-05-24 05:48:00 2023-05-24 05:49:00      45
 3 98753116    98782152 2023-05-24 06:05:00 2023-05-24 06:07:00     131
 4 98782188    98791463 2023-05-24 07:28:00 2023-05-24 07:38:00     552
 5 98791463    98782154 2023-05-24 08:49:00 2023-05-24 08:49:00       8
 6 98782188    98753116 2023-05-24 09:52:00 2023-05-24 10:03:00     632
 7 98753116    98791460 2023-05-24 10:11:00 2023-05-24 10:24:00     778
 8 98782150    98791463 2023-05-24 14:52:00 2023-05-24 15:10:00    1096
 9 98782188    98782187 2023-05-24 17:05:00 2023-05-24 17:05:00      11
10 98782187    98791460 2023-05-24 19:09:00 2023-05-24 19:13:00     275
# ℹ 200,692 more rows
# ℹ 6 more variables: Social_Group <chr>, L_time <dbl>, Hour_ENTRY <int>,
#   time_between <dbl>, FEED_INTK <int>, PEN <fct>

dim(data_PIC_filtered5)

[1] 200702     11

dim(data_PIC_pvalues300_distant)

[1] 224191     30

Creating column for PEs

#| warning: true
#| echo: true

# 60 inmediate
data_PIC_filtered <- data_PIC_filtered %>%
  rename(Animal = ID)

dim(data_PIC_filtered)

[1] 694524     11

head(data_PIC_filtered)

# A tibble: 6 × 11
# Groups:   Social_Group [1]
    Animal Follower_ID ENTRY               EXIT                STAY_IN
     <int>       <int> <dttm>              <dttm>                <int>
1 98782187    98791463 2023-05-24 08:36:00 2023-05-24 08:48:00     718
2 98793248    98782188 2023-05-24 09:51:00 2023-05-24 09:51:00      13
3 98782187    98793248 2023-05-24 11:33:00 2023-05-24 11:42:00     556
4 98793431    98782154 2023-05-24 13:26:00 2023-05-24 13:50:00    1494
5 98782150    98793249 2023-05-24 13:59:00 2023-05-24 14:03:00     252
6 98782150    98782187 2023-05-24 14:07:00 2023-05-24 14:12:00     301
# ℹ 6 more variables: Social_Group <chr>, L_time <dbl>, Hour_ENTRY <int>,
#   time_between <dbl>, FEED_INTK <int>, PEN <fct>

data_PIC_filtered <- within(data_PIC_filtered, {
  Animalpe <- Animal
  Follower_IDpe <- Follower_ID
  sdL_time <- L_time / sd(L_time)
})

dim(data_PIC_filtered)

[1] 694524     14

data_PIC_filtered

# A tibble: 694,524 × 14
# Groups:   Social_Group [306]
     Animal Follower_ID ENTRY               EXIT                STAY_IN
      <int>       <int> <dttm>              <dttm>                <int>
 1 98782187    98791463 2023-05-24 08:36:00 2023-05-24 08:48:00     718
 2 98793248    98782188 2023-05-24 09:51:00 2023-05-24 09:51:00      13
 3 98782187    98793248 2023-05-24 11:33:00 2023-05-24 11:42:00     556
 4 98793431    98782154 2023-05-24 13:26:00 2023-05-24 13:50:00    1494
 5 98782150    98793249 2023-05-24 13:59:00 2023-05-24 14:03:00     252
 6 98782150    98782187 2023-05-24 14:07:00 2023-05-24 14:12:00     301
 7 98782187    98791460 2023-05-24 14:13:00 2023-05-24 14:36:00    1394
 8 98791463    98782154 2023-05-24 15:56:00 2023-05-24 16:24:00    1695
 9 98782154    98791463 2023-05-24 16:24:00 2023-05-24 16:24:00       7
10 98791463    98782154 2023-05-24 16:24:00 2023-05-24 16:25:00      15
# ℹ 694,514 more rows
# ℹ 9 more variables: Social_Group <chr>, L_time <dbl>, Hour_ENTRY <int>,
#   time_between <dbl>, FEED_INTK <int>, PEN <fct>, sdL_time <dbl>,
#   Follower_IDpe <int>, Animalpe <int>

colnames(data_PIC_filtered)

 [1] "Animal"        "Follower_ID"   "ENTRY"         "EXIT"         
 [5] "STAY_IN"       "Social_Group"  "L_time"        "Hour_ENTRY"   
 [9] "time_between"  "FEED_INTK"     "PEN"           "sdL_time"     
[13] "Follower_IDpe" "Animalpe"

# Changing order of columns
data_PIC_filtered <- data_PIC_filtered[c("Animal", "Animalpe", "Social_Group", "Follower_ID", "Follower_IDpe", "Hour_ENTRY", "FEED_INTK", "L_time")]
colnames(data_PIC_filtered1)

 [1] "ID"           "Follower_ID"  "ENTRY"        "EXIT"         "STAY_IN"     
 [6] "Social_Group" "L_time"       "Hour_ENTRY"   "time_between" "FEED_INTK"   
[11] "PEN"

##60distant
data_PIC_filtered1 <- data_PIC_filtered1 %>%
  rename(Animal = ID)

data_PIC_filtered1 <- within(data_PIC_filtered1, {
  Animalpe <- Animal
  Follower_IDpe <- Follower_ID
  sdL_time <- L_time / sd(L_time)
})

dim(data_PIC_filtered1)

[1] 303477     14

data_PIC_filtered1

# A tibble: 303,477 × 14
# Groups:   Social_Group [306]
     Animal Follower_ID ENTRY               EXIT                STAY_IN
      <int>       <int> <dttm>              <dttm>                <int>
 1 98782150    98782154 2023-05-24 05:40:00 2023-05-24 05:40:00       7
 2 98782154    98782330 2023-05-24 05:48:00 2023-05-24 05:49:00      45
 3 98782330    98782150 2023-05-24 05:58:00 2023-05-24 06:00:00     122
 4 98782150    98753116 2023-05-24 06:03:00 2023-05-24 06:03:00      24
 5 98753116    98782152 2023-05-24 06:05:00 2023-05-24 06:07:00     131
 6 98782152    98782188 2023-05-24 07:13:00 2023-05-24 07:26:00     814
 7 98782188    98791463 2023-05-24 07:28:00 2023-05-24 07:38:00     552
 8 98791463    98782150 2023-05-24 08:06:00 2023-05-24 08:07:00      53
 9 98782150    98793248 2023-05-24 08:10:00 2023-05-24 08:25:00     905
10 98793248    98793431 2023-05-24 08:27:00 2023-05-24 08:27:00      25
# ℹ 303,467 more rows
# ℹ 9 more variables: Social_Group <chr>, L_time <dbl>, Hour_ENTRY <int>,
#   time_between <dbl>, FEED_INTK <int>, PEN <fct>, sdL_time <dbl>,
#   Follower_IDpe <int>, Animalpe <int>

colnames(data_PIC_filtered1)

 [1] "Animal"        "Follower_ID"   "ENTRY"         "EXIT"         
 [5] "STAY_IN"       "Social_Group"  "L_time"        "Hour_ENTRY"   
 [9] "time_between"  "FEED_INTK"     "PEN"           "sdL_time"     
[13] "Follower_IDpe" "Animalpe"

# Changing order of columns
data_PIC_filtered1 <- data_PIC_filtered1[c("Animal", "Animalpe", "Social_Group", "Follower_ID", "Follower_IDpe", "Hour_ENTRY", "FEED_INTK", "L_time")]
colnames(data_PIC_filtered1)

[1] "Animal"        "Animalpe"      "Social_Group"  "Follower_ID"  
[5] "Follower_IDpe" "Hour_ENTRY"    "FEED_INTK"     "L_time"

#120distant
data_PIC_filtered2 <- data_PIC_filtered2 %>%
  rename(Animal = ID)

data_PIC_filtered2 <- within(data_PIC_filtered2, {
  Animalpe <- Animal
  Follower_IDpe <- Follower_ID
  sdL_time <- L_time / sd(L_time)
})


dim(data_PIC_filtered2)

[1] 255815     14

data_PIC_filtered2

# A tibble: 255,815 × 14
# Groups:   Social_Group [306]
     Animal Follower_ID ENTRY               EXIT                STAY_IN
      <int>       <int> <dttm>              <dttm>                <int>
 1 98782150    98782154 2023-05-24 05:40:00 2023-05-24 05:40:00       7
 2 98782154    98782330 2023-05-24 05:48:00 2023-05-24 05:49:00      45
 3 98782330    98782150 2023-05-24 05:58:00 2023-05-24 06:00:00     122
 4 98753116    98782152 2023-05-24 06:05:00 2023-05-24 06:07:00     131
 5 98782188    98791463 2023-05-24 07:28:00 2023-05-24 07:38:00     552
 6 98791463    98782150 2023-05-24 08:06:00 2023-05-24 08:07:00      53
 7 98793248    98793431 2023-05-24 08:27:00 2023-05-24 08:27:00      25
 8 98793431    98782187 2023-05-24 08:32:00 2023-05-24 08:33:00      19
 9 98791463    98782154 2023-05-24 08:49:00 2023-05-24 08:49:00       8
10 98782188    98753116 2023-05-24 09:52:00 2023-05-24 10:03:00     632
# ℹ 255,805 more rows
# ℹ 9 more variables: Social_Group <chr>, L_time <dbl>, Hour_ENTRY <int>,
#   time_between <dbl>, FEED_INTK <int>, PEN <fct>, sdL_time <dbl>,
#   Follower_IDpe <int>, Animalpe <int>

colnames(data_PIC_filtered2)

 [1] "Animal"        "Follower_ID"   "ENTRY"         "EXIT"         
 [5] "STAY_IN"       "Social_Group"  "L_time"        "Hour_ENTRY"   
 [9] "time_between"  "FEED_INTK"     "PEN"           "sdL_time"     
[13] "Follower_IDpe" "Animalpe"

# Changing order of columns
data_PIC_filtered2 <- data_PIC_filtered2[c("Animal", "Animalpe", "Social_Group", "Follower_ID", "Follower_IDpe", "Hour_ENTRY", "FEED_INTK", "L_time")]
colnames(data_PIC_filtered2)

[1] "Animal"        "Animalpe"      "Social_Group"  "Follower_ID"  
[5] "Follower_IDpe" "Hour_ENTRY"    "FEED_INTK"     "L_time"

#180distant
data_PIC_filtered3 <- data_PIC_filtered3 %>%
  rename(Animal = ID)

data_PIC_filtered3 <- within(data_PIC_filtered3, {
  Animalpe <- Animal
  Follower_IDpe <- Follower_ID
  sdL_time <- L_time / sd(L_time)
})

dim(data_PIC_filtered3)

[1] 231413     14

data_PIC_filtered3

# A tibble: 231,413 × 14
# Groups:   Social_Group [306]
     Animal Follower_ID ENTRY               EXIT                STAY_IN
      <int>       <int> <dttm>              <dttm>                <int>
 1 98782150    98782154 2023-05-24 05:40:00 2023-05-24 05:40:00       7
 2 98782154    98782330 2023-05-24 05:48:00 2023-05-24 05:49:00      45
 3 98753116    98782152 2023-05-24 06:05:00 2023-05-24 06:07:00     131
 4 98782188    98791463 2023-05-24 07:28:00 2023-05-24 07:38:00     552
 5 98793248    98793431 2023-05-24 08:27:00 2023-05-24 08:27:00      25
 6 98791463    98782154 2023-05-24 08:49:00 2023-05-24 08:49:00       8
 7 98782188    98753116 2023-05-24 09:52:00 2023-05-24 10:03:00     632
 8 98753116    98791460 2023-05-24 10:11:00 2023-05-24 10:24:00     778
 9 98791460    98782150 2023-05-24 14:36:00 2023-05-24 14:48:00     722
10 98782150    98791463 2023-05-24 14:52:00 2023-05-24 15:10:00    1096
# ℹ 231,403 more rows
# ℹ 9 more variables: Social_Group <chr>, L_time <dbl>, Hour_ENTRY <int>,
#   time_between <dbl>, FEED_INTK <int>, PEN <fct>, sdL_time <dbl>,
#   Follower_IDpe <int>, Animalpe <int>

colnames(data_PIC_filtered3)

 [1] "Animal"        "Follower_ID"   "ENTRY"         "EXIT"         
 [5] "STAY_IN"       "Social_Group"  "L_time"        "Hour_ENTRY"   
 [9] "time_between"  "FEED_INTK"     "PEN"           "sdL_time"     
[13] "Follower_IDpe" "Animalpe"

# Changing order of columns
data_PIC_filtered3 <- data_PIC_filtered3[c("Animal", "Animalpe", "Social_Group", "Follower_ID", "Follower_IDpe", "Hour_ENTRY", "FEED_INTK", "L_time")]
colnames(data_PIC_filtered3)

[1] "Animal"        "Animalpe"      "Social_Group"  "Follower_ID"  
[5] "Follower_IDpe" "Hour_ENTRY"    "FEED_INTK"     "L_time"

#240distant
data_PIC_filtered4 <- data_PIC_filtered4 %>%
  rename(Animal = ID)

data_PIC_filtered4 <- within(data_PIC_filtered4, {
  Animalpe <- Animal
  Follower_IDpe <- Follower_ID
  sdL_time <- L_time / sd(L_time)
})


dim(data_PIC_filtered4)

[1] 214224     14

data_PIC_filtered4

# A tibble: 214,224 × 14
# Groups:   Social_Group [306]
     Animal Follower_ID ENTRY               EXIT                STAY_IN
      <int>       <int> <dttm>              <dttm>                <int>
 1 98782150    98782154 2023-05-24 05:40:00 2023-05-24 05:40:00       7
 2 98782154    98782330 2023-05-24 05:48:00 2023-05-24 05:49:00      45
 3 98753116    98782152 2023-05-24 06:05:00 2023-05-24 06:07:00     131
 4 98782188    98791463 2023-05-24 07:28:00 2023-05-24 07:38:00     552
 5 98793248    98793431 2023-05-24 08:27:00 2023-05-24 08:27:00      25
 6 98791463    98782154 2023-05-24 08:49:00 2023-05-24 08:49:00       8
 7 98782188    98753116 2023-05-24 09:52:00 2023-05-24 10:03:00     632
 8 98753116    98791460 2023-05-24 10:11:00 2023-05-24 10:24:00     778
 9 98782150    98791463 2023-05-24 14:52:00 2023-05-24 15:10:00    1096
10 98782188    98782187 2023-05-24 17:05:00 2023-05-24 17:05:00      11
# ℹ 214,214 more rows
# ℹ 9 more variables: Social_Group <chr>, L_time <dbl>, Hour_ENTRY <int>,
#   time_between <dbl>, FEED_INTK <int>, PEN <fct>, sdL_time <dbl>,
#   Follower_IDpe <int>, Animalpe <int>

colnames(data_PIC_filtered4)

 [1] "Animal"        "Follower_ID"   "ENTRY"         "EXIT"         
 [5] "STAY_IN"       "Social_Group"  "L_time"        "Hour_ENTRY"   
 [9] "time_between"  "FEED_INTK"     "PEN"           "sdL_time"     
[13] "Follower_IDpe" "Animalpe"

# Changing order of columns
data_PIC_filtered4 <- data_PIC_filtered4[c("Animal", "Animalpe", "Social_Group", "Follower_ID", "Follower_IDpe", "Hour_ENTRY", "FEED_INTK", "L_time")]
colnames(data_PIC_filtered4)

[1] "Animal"        "Animalpe"      "Social_Group"  "Follower_ID"  
[5] "Follower_IDpe" "Hour_ENTRY"    "FEED_INTK"     "L_time"

#300distant

data_PIC_filtered5 <- data_PIC_filtered5 %>%
  rename(Animal = ID)

data_PIC_filtered5 <- within(data_PIC_filtered5, {
  Animalpe <- Animal
  Follower_IDpe <- Follower_ID
  sdL_time <- L_time / sd(L_time)
})

dim(data_PIC_filtered5)

[1] 200702     14

data_PIC_filtered5

# A tibble: 200,702 × 14
# Groups:   Social_Group [306]
     Animal Follower_ID ENTRY               EXIT                STAY_IN
      <int>       <int> <dttm>              <dttm>                <int>
 1 98782150    98782154 2023-05-24 05:40:00 2023-05-24 05:40:00       7
 2 98782154    98782330 2023-05-24 05:48:00 2023-05-24 05:49:00      45
 3 98753116    98782152 2023-05-24 06:05:00 2023-05-24 06:07:00     131
 4 98782188    98791463 2023-05-24 07:28:00 2023-05-24 07:38:00     552
 5 98791463    98782154 2023-05-24 08:49:00 2023-05-24 08:49:00       8
 6 98782188    98753116 2023-05-24 09:52:00 2023-05-24 10:03:00     632
 7 98753116    98791460 2023-05-24 10:11:00 2023-05-24 10:24:00     778
 8 98782150    98791463 2023-05-24 14:52:00 2023-05-24 15:10:00    1096
 9 98782188    98782187 2023-05-24 17:05:00 2023-05-24 17:05:00      11
10 98782187    98791460 2023-05-24 19:09:00 2023-05-24 19:13:00     275
# ℹ 200,692 more rows
# ℹ 9 more variables: Social_Group <chr>, L_time <dbl>, Hour_ENTRY <int>,
#   time_between <dbl>, FEED_INTK <int>, PEN <fct>, sdL_time <dbl>,
#   Follower_IDpe <int>, Animalpe <int>

colnames(data_PIC_filtered5)

 [1] "Animal"        "Follower_ID"   "ENTRY"         "EXIT"         
 [5] "STAY_IN"       "Social_Group"  "L_time"        "Hour_ENTRY"   
 [9] "time_between"  "FEED_INTK"     "PEN"           "sdL_time"     
[13] "Follower_IDpe" "Animalpe"

# Changing order of columns
data_PIC_filtered5 <- data_PIC_filtered5[c("Animal", "Animalpe", "Social_Group", "Follower_ID", "Follower_IDpe", "Hour_ENTRY", "FEED_INTK", "L_time")]
colnames(data_PIC_filtered5)

[1] "Animal"        "Animalpe"      "Social_Group"  "Follower_ID"  
[5] "Follower_IDpe" "Hour_ENTRY"    "FEED_INTK"     "L_time"

Filtering for social group number from 14-16 animals 60s immediate

#| warning: true
#| echo: true

social_group_summary <- data_PIC_filtered %>%
  group_by(Social_Group) %>%
  summarise(
    Unique_Animal_Count = n_distinct(Animal),  # Count distinct IDs in each social group
    feeding_rec = n()                      # Count all records in each social group
  )

# joining this summary back to the original data to keep all columns
extended_data <- data_PIC_filtered %>%
  left_join(social_group_summary, by = "Social_Group")

# Filter the data where the Unique_Animal_Count is greater than 13 to keep 14,15,16 SG
filtered_data <- filter(extended_data, Unique_Animal_Count > 13)

final_summary <- filtered_data %>%
  summarize(Total_rec = sum(feeding_rec))

final_summary

# A tibble: 255 × 2
   Social_Group      Total_rec
   <chr>                 <int>
 1 B0102240523240723   7295401
 2 B0104090519160719  10824100
 3 B0104180719250919   9018009
 4 B0104201218260219   6007401
 5 B0104240523240723   4477456
 6 B0104240621300821   3367225
 7 B0104280219070519   6100900
 8 B0106090720140920   7371225
 9 B0106130220130420   7441984
10 B0106170920161120   4558225
# ℹ 245 more rows

# Display the filtered data with all original columns
print(head(filtered_data))

# A tibble: 6 × 10
# Groups:   Social_Group [1]
    Animal Animalpe Social_Group  Follower_ID Follower_IDpe Hour_ENTRY FEED_INTK
     <int>    <int> <chr>               <int>         <int>      <int>     <int>
1 98782187 98782187 B01022405232…    98791463      98791463          8       188
2 98793248 98793248 B01022405232…    98782188      98782188          9         2
3 98782187 98782187 B01022405232…    98793248      98793248         11        15
4 98793431 98793431 B01022405232…    98782154      98782154         13       358
5 98782150 98782150 B01022405232…    98793249      98793249         13        35
6 98782150 98782150 B01022405232…    98782187      98782187         14        62
# ℹ 3 more variables: L_time <dbl>, Unique_Animal_Count <int>,
#   feeding_rec <int>

# total records after filtering
print(final_summary)

# A tibble: 255 × 2
   Social_Group      Total_rec
   <chr>                 <int>
 1 B0102240523240723   7295401
 2 B0104090519160719  10824100
 3 B0104180719250919   9018009
 4 B0104201218260219   6007401
 5 B0104240523240723   4477456
 6 B0104240621300821   3367225
 7 B0104280219070519   6100900
 8 B0106090720140920   7371225
 9 B0106130220130420   7441984
10 B0106170920161120   4558225
# ℹ 245 more rows

dim(filtered_data)

[1] 631867     10

filtered_data

# A tibble: 631,867 × 10
# Groups:   Social_Group [255]
     Animal Animalpe Social_Group Follower_ID Follower_IDpe Hour_ENTRY FEED_INTK
      <int>    <int> <chr>              <int>         <int>      <int>     <int>
 1 98782187 98782187 B0102240523…    98791463      98791463          8       188
 2 98793248 98793248 B0102240523…    98782188      98782188          9         2
 3 98782187 98782187 B0102240523…    98793248      98793248         11        15
 4 98793431 98793431 B0102240523…    98782154      98782154         13       358
 5 98782150 98782150 B0102240523…    98793249      98793249         13        35
 6 98782150 98782150 B0102240523…    98782187      98782187         14        62
 7 98782187 98782187 B0102240523…    98791460      98791460         14       344
 8 98791463 98791463 B0102240523…    98782154      98782154         15         3
 9 98782154 98782154 B0102240523…    98791463      98791463         16         0
10 98791463 98791463 B0102240523…    98782154      98782154         16       350
# ℹ 631,857 more rows
# ℹ 3 more variables: L_time <dbl>, Unique_Animal_Count <int>,
#   feeding_rec <int>

social_group_summary <- filtered_data %>%
  group_by(Social_Group) %>%
  summarise(
    Unique_IDs = n_distinct(Animal)  
  )

print(social_group_summary)

# A tibble: 255 × 2
   Social_Group      Unique_IDs
   <chr>                  <int>
 1 B0102240523240723         16
 2 B0104090519160719         15
 3 B0104180719250919         15
 4 B0104201218260219         14
 5 B0104240523240723         16
 6 B0104240621300821         14
 7 B0104280219070519         15
 8 B0106090720140920         16
 9 B0106130220130420         16
10 B0106170920161120         16
# ℹ 245 more rows

unique_ids_frequency <- table(filtered_data$Unique_Animal_Count)

print(unique_ids_frequency)


    14     15     16 
 83401 228671 319795

ggplot(filtered_data, aes(x= Unique_Animal_Count , y = feeding_rec, )) +
  geom_point()+
  geom_smooth()

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Warning: Failed to fit group -1.
Caused by error in `smooth.construct.cr.smooth.spec()`:
! x has insufficient unique values to support 10 knots: reduce k.

Filtering for social group number from 14-16 animals 60s distant

social_group_summary1 <- data_PIC_filtered1 %>%
  group_by(Social_Group) %>%
  summarise(
    Unique_Animal_Count = n_distinct(Animal),  # Count distinct IDs in each social group
    feeding_rec = n()                      # Count all records in each social group
  )

# joining this summary back to the original data to keep all columns
extended_data1 <- data_PIC_filtered1 %>%
  left_join(social_group_summary1, by = "Social_Group")

# Filter the data where the Unique_Animal_Count is greater than 13 to keep 14,15,16 SG
filtered_data1 <- filter(extended_data1, Unique_Animal_Count > 13)

final_summary1 <- filtered_data1 %>%
  summarize(Total_rec = sum(feeding_rec))

final_summary1

# A tibble: 255 × 2
   Social_Group      Total_rec
   <chr>                 <int>
 1 B0102240523240723    352836
 2 B0104090519160719   1787569
 3 B0104180719250919   3305124
 4 B0104201218260219   1623076
 5 B0104240523240723    731025
 6 B0104240621300821   1024144
 7 B0104280219070519   1646089
 8 B0106090720140920   1304164
 9 B0106130220130420    427716
10 B0106170920161120   2474329
# ℹ 245 more rows

# Display the filtered data with all original columns
print(head(filtered_data1))

# A tibble: 6 × 10
# Groups:   Social_Group [1]
    Animal Animalpe Social_Group  Follower_ID Follower_IDpe Hour_ENTRY FEED_INTK
     <int>    <int> <chr>               <int>         <int>      <int>     <int>
1 98782150 98782150 B01022405232…    98782154      98782154          5         0
2 98782154 98782154 B01022405232…    98782330      98782330          5        -2
3 98782330 98782330 B01022405232…    98782150      98782150          5        32
4 98782150 98782150 B01022405232…    98753116      98753116          6         0
5 98753116 98753116 B01022405232…    98782152      98782152          6        31
6 98782152 98782152 B01022405232…    98782188      98782188          7        94
# ℹ 3 more variables: L_time <dbl>, Unique_Animal_Count <int>,
#   feeding_rec <int>

# total records after filtering
print(final_summary1)

# A tibble: 255 × 2
   Social_Group      Total_rec
   <chr>                 <int>
 1 B0102240523240723    352836
 2 B0104090519160719   1787569
 3 B0104180719250919   3305124
 4 B0104201218260219   1623076
 5 B0104240523240723    731025
 6 B0104240621300821   1024144
 7 B0104280219070519   1646089
 8 B0106090720140920   1304164
 9 B0106130220130420    427716
10 B0106170920161120   2474329
# ℹ 245 more rows

dim(filtered_data1)

[1] 247648     10

filtered_data1

# A tibble: 247,648 × 10
# Groups:   Social_Group [255]
     Animal Animalpe Social_Group Follower_ID Follower_IDpe Hour_ENTRY FEED_INTK
      <int>    <int> <chr>              <int>         <int>      <int>     <int>
 1 98782150 98782150 B0102240523…    98782154      98782154          5         0
 2 98782154 98782154 B0102240523…    98782330      98782330          5        -2
 3 98782330 98782330 B0102240523…    98782150      98782150          5        32
 4 98782150 98782150 B0102240523…    98753116      98753116          6         0
 5 98753116 98753116 B0102240523…    98782152      98782152          6        31
 6 98782152 98782152 B0102240523…    98782188      98782188          7        94
 7 98782188 98782188 B0102240523…    98791463      98791463          7       118
 8 98791463 98791463 B0102240523…    98782150      98782150          8         5
 9 98782150 98782150 B0102240523…    98793248      98793248          8       154
10 98793248 98793248 B0102240523…    98793431      98793431          8         3
# ℹ 247,638 more rows
# ℹ 3 more variables: L_time <dbl>, Unique_Animal_Count <int>,
#   feeding_rec <int>

social_group_summary1 <- filtered_data1 %>%
  group_by(Social_Group) %>%
  summarise(
    Unique_IDs = n_distinct(Animal)  
  )

print(social_group_summary1)

# A tibble: 255 × 2
   Social_Group      Unique_IDs
   <chr>                  <int>
 1 B0102240523240723         16
 2 B0104090519160719         15
 3 B0104180719250919         15
 4 B0104201218260219         14
 5 B0104240523240723         16
 6 B0104240621300821         14
 7 B0104280219070519         15
 8 B0106090720140920         16
 9 B0106130220130420         16
10 B0106170920161120         16
# ℹ 245 more rows

unique_ids_frequency1 <- table(filtered_data1$Unique_Animal_Count)

print(unique_ids_frequency1)


    14     15     16 
 43861  94466 109321

ggplot(filtered_data1, aes(x= Unique_Animal_Count , y = feeding_rec, )) +
  geom_point()+
  geom_smooth()

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Warning: Failed to fit group -1.
Caused by error in `smooth.construct.cr.smooth.spec()`:
! x has insufficient unique values to support 10 knots: reduce k.

Filtering for social group number from 14-16 animals 120s distant

colnames(data_PIC_filtered2)

[1] "Animal"        "Animalpe"      "Social_Group"  "Follower_ID"  
[5] "Follower_IDpe" "Hour_ENTRY"    "FEED_INTK"     "L_time"

social_group_summary2 <- data_PIC_filtered2 %>%
  group_by(Social_Group) %>%
  summarise(
    Unique_Animal_Count = n_distinct(Animal),  # Count distinct IDs in each social group
    feeding_rec = n()                      # Count all records in each social group
  )

# joining this summary back to the original data to keep all columns
extended_data2 <- data_PIC_filtered2 %>%
  left_join(social_group_summary2, by = "Social_Group")

# Filter the data where the Unique_Animal_Count is greater than 13 to keep 14,15,16 SG
filtered_data2 <- filter(extended_data2, Unique_Animal_Count > 13)

final_summary2 <- filtered_data2 %>%
  summarize(Total_rec = sum(feeding_rec))

final_summary2

# A tibble: 255 × 2
   Social_Group      Total_rec
   <chr>                 <int>
 1 B0102240523240723    230400
 2 B0104090519160719   1194649
 3 B0104180719250919   2301289
 4 B0104201218260219   1203409
 5 B0104240523240723    452929
 6 B0104240621300821    781456
 7 B0104280219070519   1153476
 8 B0106090720140920    931225
 9 B0106130220130420    278784
10 B0106170920161120   1565001
# ℹ 245 more rows

# Display the filtered data with all original columns
print(head(filtered_data2))

# A tibble: 6 × 10
# Groups:   Social_Group [1]
    Animal Animalpe Social_Group  Follower_ID Follower_IDpe Hour_ENTRY FEED_INTK
     <int>    <int> <chr>               <int>         <int>      <int>     <int>
1 98782150 98782150 B01022405232…    98782154      98782154          5         0
2 98782154 98782154 B01022405232…    98782330      98782330          5        -2
3 98782330 98782330 B01022405232…    98782150      98782150          5        32
4 98753116 98753116 B01022405232…    98782152      98782152          6        31
5 98782188 98782188 B01022405232…    98791463      98791463          7       118
6 98791463 98791463 B01022405232…    98782150      98782150          8         5
# ℹ 3 more variables: L_time <dbl>, Unique_Animal_Count <int>,
#   feeding_rec <int>

# total records after filtering
print(final_summary2)

# A tibble: 255 × 2
   Social_Group      Total_rec
   <chr>                 <int>
 1 B0102240523240723    230400
 2 B0104090519160719   1194649
 3 B0104180719250919   2301289
 4 B0104201218260219   1203409
 5 B0104240523240723    452929
 6 B0104240621300821    781456
 7 B0104280219070519   1153476
 8 B0106090720140920    931225
 9 B0106130220130420    278784
10 B0106170920161120   1565001
# ℹ 245 more rows

dim(filtered_data2)

[1] 204769     10

colnames(filtered_data2)

 [1] "Animal"              "Animalpe"            "Social_Group"       
 [4] "Follower_ID"         "Follower_IDpe"       "Hour_ENTRY"         
 [7] "FEED_INTK"           "L_time"              "Unique_Animal_Count"
[10] "feeding_rec"

social_group_summary2 <- filtered_data2 %>%
  group_by(Social_Group) %>%
  summarise(
    Unique_IDs = n_distinct(Animal)  
  )

print(social_group_summary2)

# A tibble: 255 × 2
   Social_Group      Unique_IDs
   <chr>                  <int>
 1 B0102240523240723         16
 2 B0104090519160719         15
 3 B0104180719250919         15
 4 B0104201218260219         14
 5 B0104240523240723         16
 6 B0104240621300821         14
 7 B0104280219070519         15
 8 B0106090720140920         16
 9 B0106130220130420         16
10 B0106170920161120         16
# ℹ 245 more rows

unique_ids_frequency2 <- table(filtered_data2$Unique_Animal_Count)

print(unique_ids_frequency2)


   14    15    16 
37722 79771 87276

ggplot(filtered_data2, aes(x= Unique_Animal_Count , y = feeding_rec, )) +
  geom_point()+
  geom_smooth()

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Warning: Failed to fit group -1.
Caused by error in `smooth.construct.cr.smooth.spec()`:
! x has insufficient unique values to support 10 knots: reduce k.

Filtering for social group number from 14-16 animals 180s distant

social_group_summary3 <- data_PIC_filtered3 %>%
  group_by(Social_Group) %>%
  summarise(
    Unique_Animal_Count = n_distinct(Animal),  # Count distinct IDs in each social group
    feeding_rec = n()                      # Count all records in each social group
  )

# joining this summary back to the original data to keep all columns
extended_data3 <- data_PIC_filtered3 %>%
  left_join(social_group_summary3, by = "Social_Group")

# Filter the data where the Unique_Animal_Count is greater than 13 to keep 14,15,16 SG
filtered_data3 <- filter(extended_data3, Unique_Animal_Count > 13)

final_summary3 <- filtered_data3 %>%
  summarize(Total_rec = sum(feeding_rec))

final_summary3

# A tibble: 255 × 2
   Social_Group      Total_rec
   <chr>                 <int>
 1 B0102240523240723    178929
 2 B0104090519160719    929296
 3 B0104180719250919   1819801
 4 B0104201218260219    980100
 5 B0104240523240723    346921
 6 B0104240621300821    662596
 7 B0104280219070519    923521
 8 B0106090720140920    753424
 9 B0106130220130420    210681
10 B0106170920161120   1117249
# ℹ 245 more rows

# Display the filtered data with all original columns
print(head(filtered_data3))

# A tibble: 6 × 10
# Groups:   Social_Group [1]
    Animal Animalpe Social_Group  Follower_ID Follower_IDpe Hour_ENTRY FEED_INTK
     <int>    <int> <chr>               <int>         <int>      <int>     <int>
1 98782150 98782150 B01022405232…    98782154      98782154          5         0
2 98782154 98782154 B01022405232…    98782330      98782330          5        -2
3 98753116 98753116 B01022405232…    98782152      98782152          6        31
4 98782188 98782188 B01022405232…    98791463      98791463          7       118
5 98793248 98793248 B01022405232…    98793431      98793431          8         3
6 98791463 98791463 B01022405232…    98782154      98782154          8        -2
# ℹ 3 more variables: L_time <dbl>, Unique_Animal_Count <int>,
#   feeding_rec <int>

# total records after filtering
print(final_summary3)

# A tibble: 255 × 2
   Social_Group      Total_rec
   <chr>                 <int>
 1 B0102240523240723    178929
 2 B0104090519160719    929296
 3 B0104180719250919   1819801
 4 B0104201218260219    980100
 5 B0104240523240723    346921
 6 B0104240621300821    662596
 7 B0104280219070519    923521
 8 B0106090720140920    753424
 9 B0106130220130420    210681
10 B0106170920161120   1117249
# ℹ 245 more rows

dim(filtered_data3)

[1] 183079     10

filtered_data3

# A tibble: 183,079 × 10
# Groups:   Social_Group [255]
     Animal Animalpe Social_Group Follower_ID Follower_IDpe Hour_ENTRY FEED_INTK
      <int>    <int> <chr>              <int>         <int>      <int>     <int>
 1 98782150 98782150 B0102240523…    98782154      98782154          5         0
 2 98782154 98782154 B0102240523…    98782330      98782330          5        -2
 3 98753116 98753116 B0102240523…    98782152      98782152          6        31
 4 98782188 98782188 B0102240523…    98791463      98791463          7       118
 5 98793248 98793248 B0102240523…    98793431      98793431          8         3
 6 98791463 98791463 B0102240523…    98782154      98782154          8        -2
 7 98782188 98782188 B0102240523…    98753116      98753116          9       111
 8 98753116 98753116 B0102240523…    98791460      98791460         10       152
 9 98791460 98791460 B0102240523…    98782150      98782150         14       175
10 98782150 98782150 B0102240523…    98791463      98791463         14       213
# ℹ 183,069 more rows
# ℹ 3 more variables: L_time <dbl>, Unique_Animal_Count <int>,
#   feeding_rec <int>

social_group_summary3 <- filtered_data3 %>%
  group_by(Social_Group) %>%
  summarise(
    Unique_IDs = n_distinct(Animal)  
  )

print(social_group_summary3)

# A tibble: 255 × 2
   Social_Group      Unique_IDs
   <chr>                  <int>
 1 B0102240523240723         16
 2 B0104090519160719         15
 3 B0104180719250919         15
 4 B0104201218260219         14
 5 B0104240523240723         16
 6 B0104240621300821         14
 7 B0104280219070519         15
 8 B0106090720140920         16
 9 B0106130220130420         16
10 B0106170920161120         16
# ℹ 245 more rows

unique_ids_frequency3 <- table(filtered_data3$Unique_Animal_Count)

print(unique_ids_frequency3)


   14    15    16 
34478 71543 77058

ggplot(filtered_data3, aes(x= Unique_Animal_Count , y = feeding_rec, )) +
  geom_point()+
  geom_smooth()

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Warning: Failed to fit group -1.
Caused by error in `smooth.construct.cr.smooth.spec()`:
! x has insufficient unique values to support 10 knots: reduce k.

Filtering for social group number from 14-16 animals 240s distant

social_group_summary4 <- data_PIC_filtered4 %>%
  group_by(Social_Group) %>%
  summarise(
    Unique_Animal_Count = n_distinct(Animal),  # Count distinct IDs in each social group
    feeding_rec = n()                      # Count all records in each social group
  )

# joining this summary back to the original data to keep all columns
extended_data4 <- data_PIC_filtered4 %>%
  left_join(social_group_summary4, by = "Social_Group")

# Filter the data where the Unique_Animal_Count is greater than 13 to keep 14,15,16 SG
filtered_data4 <- filter(extended_data4, Unique_Animal_Count > 13)

final_summary4 <- filtered_data4 %>%
  summarize(Total_rec = sum(feeding_rec))

final_summary4

# A tibble: 255 × 2
   Social_Group      Total_rec
   <chr>                 <int>
 1 B0102240523240723    151321
 2 B0104090519160719    769129
 3 B0104180719250919   1520289
 4 B0104201218260219    844561
 5 B0104240523240723    293764
 6 B0104240621300821    576081
 7 B0104280219070519    784996
 8 B0106090720140920    624100
 9 B0106130220130420    164836
10 B0106170920161120    868624
# ℹ 245 more rows

# Display the filtered data with all original columns
print(head(filtered_data4))

# A tibble: 6 × 10
# Groups:   Social_Group [1]
    Animal Animalpe Social_Group  Follower_ID Follower_IDpe Hour_ENTRY FEED_INTK
     <int>    <int> <chr>               <int>         <int>      <int>     <int>
1 98782150 98782150 B01022405232…    98782154      98782154          5         0
2 98782154 98782154 B01022405232…    98782330      98782330          5        -2
3 98753116 98753116 B01022405232…    98782152      98782152          6        31
4 98782188 98782188 B01022405232…    98791463      98791463          7       118
5 98793248 98793248 B01022405232…    98793431      98793431          8         3
6 98791463 98791463 B01022405232…    98782154      98782154          8        -2
# ℹ 3 more variables: L_time <dbl>, Unique_Animal_Count <int>,
#   feeding_rec <int>

# total records after filtering
print(final_summary4)

# A tibble: 255 × 2
   Social_Group      Total_rec
   <chr>                 <int>
 1 B0102240523240723    151321
 2 B0104090519160719    769129
 3 B0104180719250919   1520289
 4 B0104201218260219    844561
 5 B0104240523240723    293764
 6 B0104240621300821    576081
 7 B0104280219070519    784996
 8 B0106090720140920    624100
 9 B0106130220130420    164836
10 B0106170920161120    868624
# ℹ 245 more rows

dim(filtered_data4)

[1] 167922     10

filtered_data4

# A tibble: 167,922 × 10
# Groups:   Social_Group [255]
     Animal Animalpe Social_Group Follower_ID Follower_IDpe Hour_ENTRY FEED_INTK
      <int>    <int> <chr>              <int>         <int>      <int>     <int>
 1 98782150 98782150 B0102240523…    98782154      98782154          5         0
 2 98782154 98782154 B0102240523…    98782330      98782330          5        -2
 3 98753116 98753116 B0102240523…    98782152      98782152          6        31
 4 98782188 98782188 B0102240523…    98791463      98791463          7       118
 5 98793248 98793248 B0102240523…    98793431      98793431          8         3
 6 98791463 98791463 B0102240523…    98782154      98782154          8        -2
 7 98782188 98782188 B0102240523…    98753116      98753116          9       111
 8 98753116 98753116 B0102240523…    98791460      98791460         10       152
 9 98782150 98782150 B0102240523…    98791463      98791463         14       213
10 98782188 98782188 B0102240523…    98782187      98782187         17         0
# ℹ 167,912 more rows
# ℹ 3 more variables: L_time <dbl>, Unique_Animal_Count <int>,
#   feeding_rec <int>

social_group_summary4 <- filtered_data4 %>%
  group_by(Social_Group) %>%
  summarise(
    Unique_IDs = n_distinct(Animal)  
  )

print(social_group_summary4)

# A tibble: 255 × 2
   Social_Group      Unique_IDs
   <chr>                  <int>
 1 B0102240523240723         16
 2 B0104090519160719         15
 3 B0104180719250919         15
 4 B0104201218260219         14
 5 B0104240523240723         16
 6 B0104240621300821         14
 7 B0104280219070519         15
 8 B0106090720140920         16
 9 B0106130220130420         16
10 B0106170920161120         16
# ℹ 245 more rows

unique_ids_frequency4 <- table(filtered_data4$Unique_Animal_Count)

print(unique_ids_frequency4)


   14    15    16 
32190 65688 70044

ggplot(filtered_data4, aes(x= Unique_Animal_Count , y = feeding_rec, )) +
  geom_point()+
  geom_smooth()

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Warning: Failed to fit group -1.
Caused by error in `smooth.construct.cr.smooth.spec()`:
! x has insufficient unique values to support 10 knots: reduce k.

Filtering for social group number from 14-16 animals 300s distant

social_group_summary5 <- data_PIC_filtered5 %>%
  group_by(Social_Group) %>%
  summarise(
    Unique_Animal_Count = n_distinct(Animal),  # Count distinct IDs in each social group
    feeding_rec = n()                      # Count all records in each social group
  )

# joining this summary back to the original data to keep all columns
extended_data5 <- data_PIC_filtered5 %>%
  left_join(social_group_summary5, by = "Social_Group")

# Filter the data where the Unique_Animal_Count is greater than 13 to keep 14,15,16 SG
filtered_data5 <- filter(extended_data5, Unique_Animal_Count > 13)

final_summary5 <- filtered_data5 %>%
  summarize(Total_rec = sum(feeding_rec))

final_summary5

# A tibble: 255 × 2
   Social_Group      Total_rec
   <chr>                 <int>
 1 B0102240523240723    133225
 2 B0104090519160719    636804
 3 B0104180719250919   1306449
 4 B0104201218260219    741321
 5 B0104240523240723    228484
 6 B0104240621300821    528529
 7 B0104280219070519    670761
 8 B0106090720140920    555025
 9 B0106130220130420    142129
10 B0106170920161120    698896
# ℹ 245 more rows

# Display the filtered data with all original columns
print(head(filtered_data5))

# A tibble: 6 × 10
# Groups:   Social_Group [1]
    Animal Animalpe Social_Group  Follower_ID Follower_IDpe Hour_ENTRY FEED_INTK
     <int>    <int> <chr>               <int>         <int>      <int>     <int>
1 98782150 98782150 B01022405232…    98782154      98782154          5         0
2 98782154 98782154 B01022405232…    98782330      98782330          5        -2
3 98753116 98753116 B01022405232…    98782152      98782152          6        31
4 98782188 98782188 B01022405232…    98791463      98791463          7       118
5 98791463 98791463 B01022405232…    98782154      98782154          8        -2
6 98782188 98782188 B01022405232…    98753116      98753116          9       111
# ℹ 3 more variables: L_time <dbl>, Unique_Animal_Count <int>,
#   feeding_rec <int>

# total records after filtering
print(final_summary5)

# A tibble: 255 × 2
   Social_Group      Total_rec
   <chr>                 <int>
 1 B0102240523240723    133225
 2 B0104090519160719    636804
 3 B0104180719250919   1306449
 4 B0104201218260219    741321
 5 B0104240523240723    228484
 6 B0104240621300821    528529
 7 B0104280219070519    670761
 8 B0106090720140920    555025
 9 B0106130220130420    142129
10 B0106170920161120    698896
# ℹ 245 more rows

dim(filtered_data5)

[1] 156076     10

filtered_data5

# A tibble: 156,076 × 10
# Groups:   Social_Group [255]
     Animal Animalpe Social_Group Follower_ID Follower_IDpe Hour_ENTRY FEED_INTK
      <int>    <int> <chr>              <int>         <int>      <int>     <int>
 1 98782150 98782150 B0102240523…    98782154      98782154          5         0
 2 98782154 98782154 B0102240523…    98782330      98782330          5        -2
 3 98753116 98753116 B0102240523…    98782152      98782152          6        31
 4 98782188 98782188 B0102240523…    98791463      98791463          7       118
 5 98791463 98791463 B0102240523…    98782154      98782154          8        -2
 6 98782188 98782188 B0102240523…    98753116      98753116          9       111
 7 98753116 98753116 B0102240523…    98791460      98791460         10       152
 8 98782150 98782150 B0102240523…    98791463      98791463         14       213
 9 98782188 98782188 B0102240523…    98782187      98782187         17         0
10 98782187 98782187 B0102240523…    98791460      98791460         19       291
# ℹ 156,066 more rows
# ℹ 3 more variables: L_time <dbl>, Unique_Animal_Count <int>,
#   feeding_rec <int>

social_group_summary5 <- filtered_data5 %>%
  group_by(Social_Group) %>%
  summarise(
    Unique_IDs = n_distinct(Animal)  
  )

print(social_group_summary5)

# A tibble: 255 × 2
   Social_Group      Unique_IDs
   <chr>                  <int>
 1 B0102240523240723         16
 2 B0104090519160719         15
 3 B0104180719250919         15
 4 B0104201218260219         14
 5 B0104240523240723         16
 6 B0104240621300821         14
 7 B0104280219070519         15
 8 B0106090720140920         16
 9 B0106130220130420         16
10 B0106170920161120         16
# ℹ 245 more rows

unique_ids_frequency5 <- table(filtered_data5$Unique_Animal_Count)

print(unique_ids_frequency5)


   14    15    16 
30410 61180 64486

ggplot(filtered_data5, aes(x= Unique_Animal_Count , y = feeding_rec, )) +
  geom_point()+
  geom_smooth()

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Warning: Failed to fit group -1.
Caused by error in `smooth.construct.cr.smooth.spec()`:
! x has insufficient unique values to support 10 knots: reduce k.

Checking dimensions of filtered data for SGs

dim(filtered_data)

[1] 631867     10

dim(filtered_data1)

[1] 247648     10

dim(filtered_data2)

[1] 204769     10

dim(filtered_data3)

[1] 183079     10

dim(filtered_data4)

[1] 167922     10

dim(filtered_data5)

[1] 156076     10

Checking number of animals and records of filtered data for SGs

#60  inmediate
num_unique_animals <- n_distinct(filtered_data$Animal)
num_unique_social_groups <- n_distinct(filtered_data$Social_Group)

num_unique_animals

[1] 3909

num_unique_social_groups

[1] 255

num_unique_animals_follower <- n_distinct(filtered_data$Follower_ID)
num_unique_animals_follower

[1] 3909

#60 distant
num_unique_animals1 <- n_distinct(filtered_data1$Animal)
num_unique_social_groups1 <- n_distinct(filtered_data1$Social_Group)

num_unique_animals1

[1] 3910

num_unique_social_groups1

[1] 255

num_unique_animals1_follower <- n_distinct(filtered_data1$Follower_ID)
num_unique_animals1_follower

[1] 3910

#120 distant
num_unique_animals2 <- n_distinct(filtered_data2$Animal)
num_unique_social_groups2 <- n_distinct(filtered_data2$Social_Group)

num_unique_animals2

[1] 3909

num_unique_social_groups2

[1] 255

num_unique_animals2_follower <- n_distinct(filtered_data2$Follower_ID)
num_unique_animals2_follower

[1] 3910

#180 distant
num_unique_animals3 <- n_distinct(filtered_data3$Animal)
num_unique_social_groups3 <- n_distinct(filtered_data3$Social_Group)

num_unique_animals3

[1] 3909

num_unique_social_groups3

[1] 255

num_unique_animals3_follower <- n_distinct(filtered_data3$Follower_ID)
num_unique_animals3_follower

[1] 3910

#240 distant
num_unique_animals4 <- n_distinct(filtered_data4$Animal)
num_unique_social_groups4 <- n_distinct(filtered_data4$Social_Group)

num_unique_animals4

[1] 3909

num_unique_social_groups4

[1] 255

num_unique_animals4_follower <- n_distinct(filtered_data4$Follower_ID)
num_unique_animals4_follower

[1] 3910

#300 distant
num_unique_animals5 <- n_distinct(filtered_data5$Animal)
num_unique_social_groups5 <- n_distinct(filtered_data5$Social_Group)

num_unique_animals5

[1] 3909

num_unique_social_groups5

[1] 255

num_unique_animals5_follower <- n_distinct(filtered_data5$Follower_ID)
num_unique_animals5_follower

[1] 3910

missing_animal_id <- setdiff(filtered_data5$Follower_ID, filtered_data5$Animal)

# Output the missing Follower_ID
missing_animal_id

[1] 93432951

missing_records <- filtered_data5[filtered_data5$Follower_ID %in% missing_animal_id, ]

# View these records to understand more about the context
missing_records

# A tibble: 1 × 10
# Groups:   Social_Group [1]
    Animal Animalpe Social_Group  Follower_ID Follower_IDpe Hour_ENTRY FEED_INTK
     <int>    <int> <chr>               <int>         <int>      <int>     <int>
1 93451159 93451159 B09061703222…    93432951      93432951         12       219
# ℹ 3 more variables: L_time <dbl>, Unique_Animal_Count <int>,
#   feeding_rec <int>

library(dplyr)


filtered_data <- filtered_data[, !colnames(filtered_data) %in% c("Unique_Animal_Count", "feeding_rec")]
colnames(filtered_data)

[1] "Animal"        "Animalpe"      "Social_Group"  "Follower_ID"  
[5] "Follower_IDpe" "Hour_ENTRY"    "FEED_INTK"     "L_time"

filtered_data1 <- filtered_data1[, !colnames(filtered_data1) %in% c("Unique_Animal_Count", "feeding_rec")]
colnames(filtered_data1)

[1] "Animal"        "Animalpe"      "Social_Group"  "Follower_ID"  
[5] "Follower_IDpe" "Hour_ENTRY"    "FEED_INTK"     "L_time"

filtered_data2 <- filtered_data2[, !colnames(filtered_data2) %in% c("Unique_Animal_Count", "feeding_rec")]
colnames(filtered_data2)

[1] "Animal"        "Animalpe"      "Social_Group"  "Follower_ID"  
[5] "Follower_IDpe" "Hour_ENTRY"    "FEED_INTK"     "L_time"

filtered_data3 <- filtered_data3[, !colnames(filtered_data3) %in% c("Unique_Animal_Count", "feeding_rec")]
colnames(filtered_data3)

[1] "Animal"        "Animalpe"      "Social_Group"  "Follower_ID"  
[5] "Follower_IDpe" "Hour_ENTRY"    "FEED_INTK"     "L_time"

filtered_data4 <- filtered_data4[, !colnames(filtered_data4) %in% c("Unique_Animal_Count", "feeding_rec")]
colnames(filtered_data4)

[1] "Animal"        "Animalpe"      "Social_Group"  "Follower_ID"  
[5] "Follower_IDpe" "Hour_ENTRY"    "FEED_INTK"     "L_time"

filtered_data5 <- filtered_data5[, !colnames(filtered_data5) %in% c("Unique_Animal_Count", "feeding_rec")]
colnames(filtered_data5)

[1] "Animal"        "Animalpe"      "Social_Group"  "Follower_ID"  
[5] "Follower_IDpe" "Hour_ENTRY"    "FEED_INTK"     "L_time"

Creating CSV Files

write.csv(filtered_data, "data_PIC_SGfilter_60s_inmediate.csv", row.names = FALSE)

write.csv(filtered_data1, "data_PIC_SGfilter_60s_distant.csv", row.names = FALSE)

write.csv(filtered_data2, "data_PIC_SGfilter_120s_distant.csv", row.names = FALSE)

write.csv(filtered_data3, "data_PIC_SGfilter_180s_distant.csv", row.names = FALSE)

write.csv(filtered_data4, "data_PIC_SGfilter_240s_distant.csv", row.names = FALSE)

write.csv(filtered_data5, "data_PIC_SGfilter_300s_distant.csv", row.names = FALSE)

Social Group frequency & hist

#this is data filtered for follower follows itself, we dont want this data because it does not make sense calculate the social genetic effect of the animal itself. there is no social genetic effect. BUT THIS DATA STILL HAS THE SG THAT ARE LESS THAN 14. 

social_group_data_arr <- data_PIC_filtered %>%
  group_by(Social_Group) %>%
  summarise(Unique_Animal_Count = n_distinct(Animal),
            feeding_rec = n())

social_group_data_arr

# A tibble: 306 × 3
   Social_Group      Unique_Animal_Count feeding_rec
   <chr>                           <int>       <int>
 1 B0102240523240723                  16        2701
 2 B0104090519160719                  15        3290
 3 B0104180719250919                  15        3003
 4 B0104201218260219                  14        2451
 5 B0104240523240723                  16        2116
 6 B0104240621300821                  14        1835
 7 B0104280219070519                  15        2470
 8 B0106090519160719                   2          51
 9 B0106090720140920                  16        2715
10 B0106130220130420                  16        2728
# ℹ 296 more rows

filter(social_group_data_arr, Unique_Animal_Count > 0) %>%  summarize(Total_rec = sum(feeding_rec))

# A tibble: 1 × 1
  Total_rec
      <int>
1    694524

dim(data_PIC_filtered)

[1] 694524      8

ggplot(social_group_data_arr, aes(x = feeding_rec)) +
  geom_histogram(aes(y = ..density..), binwidth = 100, fill = "skyblue", color = "black") +  # Adjust binwidth as necessary
  geom_density(alpha = .2, fill = "#FF6666") +  # Adds a density plot
  labs(title = "Histogram and Density Plot of Animal Count",
       x = "Animal Count",
       y = "Density") +
  theme_minimal()

Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(density)` instead.

# Add mean and median lines to the histogram number 
ggplot(social_group_data_arr, aes(x = feeding_rec)) +
  geom_histogram(aes(y = ..density..), binwidth = 100, fill = "skyblue", color = "black") +
  geom_density(alpha = .2, fill = "#FF6666") +
  geom_vline(aes(xintercept = mean(feeding_rec)), color = "green", linetype = "dashed", size = 1) +
  geom_vline(aes(xintercept = median(feeding_rec)), color = "red", linetype = "dashed", size = 1) +
  labs(title = "Histogram and Density Plot feeding records per Social Group",
       x = "Feeding Records", y = "Density") +
  theme_minimal()

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

social_group_data_arr

# A tibble: 306 × 3
   Social_Group      Unique_Animal_Count feeding_rec
   <chr>                           <int>       <int>
 1 B0102240523240723                  16        2701
 2 B0104090519160719                  15        3290
 3 B0104180719250919                  15        3003
 4 B0104201218260219                  14        2451
 5 B0104240523240723                  16        2116
 6 B0104240621300821                  14        1835
 7 B0104280219070519                  15        2470
 8 B0106090519160719                   2          51
 9 B0106090720140920                  16        2715
10 B0106130220130420                  16        2728
# ℹ 296 more rows

mean_count <- mean(social_group_data_arr$feeding_rec)

mean_count

[1] 2269.686

###ggplot x vs y 
ggplot(social_group_data_arr, aes(x= Unique_Animal_Count , y = feeding_rec, )) +
  geom_point()+
  geom_smooth()

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at 16.07

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 2.07

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1.0962e-15

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 1

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
16.07

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
2.07

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
number 1.0962e-15

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : There are other near
singularities as well. 1

data_PIC_pvalues60 %>% group_by(Social_Group) %>% tally()

# A tibble: 308 × 2
   Social_Group          n
   <chr>             <int>
 1 B0102240523240723  2775
 2 B0104090519160719  3303
 3 B0104180719250919  3010
 4 B0104201218260219  2527
 5 B0104240523240723  2164
 6 B0104240621300821  1851
 7 B0104280219070519  2513
 8 B0106090519160719    60
 9 B0106090720140920  2725
10 B0106130220130420  2753
# ℹ 298 more rows

data_PIC_filtered %>% group_by(Social_Group) %>% tally()

# A tibble: 306 × 2
   Social_Group          n
   <chr>             <int>
 1 B0102240523240723  2701
 2 B0104090519160719  3290
 3 B0104180719250919  3003
 4 B0104201218260219  2451
 5 B0104240523240723  2116
 6 B0104240621300821  1835
 7 B0104280219070519  2470
 8 B0106090519160719    51
 9 B0106090720140920  2715
10 B0106130220130420  2728
# ℹ 296 more rows

filtered_out <- data_PIC_pvalues60 %>% filter(ID == Follower_ID)
filtered_out

# A tibble: 12,699 × 30
# Groups:   Social_Group [308]
         ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
      <int> <int>          <int>         <int>         <int> <fct>     <int>
 1 98782154    65       93543534      94380138      79399025 B0102       774
 2 98782152    65       93543534      94380138      79399025 B0102       774
 3 98782154    65       93543534      94380138      79399025 B0102       774
 4 98793249    65       93679672      91289790      79408788 B0102       774
 5 98782154    65       93543534      94380138      79399025 B0102       774
 6 98793249    65       93679672      91289790      79408788 B0102       774
 7 98782328    65       93561960      92841243      79408790 B0102       774
 8 98782187    65       93704219      94286020      79385516 B0102       774
 9 98782187    65       93704219      94286020      79385516 B0102       774
10 98791433    65       93704219      94542696      79418353 B0102       774
# ℹ 12,689 more rows
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>, …

filtered_out %>% group_by(Social_Group) %>% tally()

# A tibble: 308 × 2
   Social_Group          n
   <chr>             <int>
 1 B0102240523240723    74
 2 B0104090519160719    13
 3 B0104180719250919     7
 4 B0104201218260219    76
 5 B0104240523240723    48
 6 B0104240621300821    16
 7 B0104280219070519    43
 8 B0106090519160719     9
 9 B0106090720140920    10
10 B0106130220130420    25
# ℹ 298 more rows

data_PIC_pvalues60 %>% group_by(Social_Group) %>% tally()

# A tibble: 308 × 2
   Social_Group          n
   <chr>             <int>
 1 B0102240523240723  2775
 2 B0104090519160719  3303
 3 B0104180719250919  3010
 4 B0104201218260219  2527
 5 B0104240523240723  2164
 6 B0104240621300821  1851
 7 B0104280219070519  2513
 8 B0106090519160719    60
 9 B0106090720140920  2725
10 B0106130220130420  2753
# ℹ 298 more rows

filtered_out %>% group_by(Social_Group) %>% tally()

# A tibble: 308 × 2
   Social_Group          n
   <chr>             <int>
 1 B0102240523240723    74
 2 B0104090519160719    13
 3 B0104180719250919     7
 4 B0104201218260219    76
 5 B0104240523240723    48
 6 B0104240621300821    16
 7 B0104280219070519    43
 8 B0106090519160719     9
 9 B0106090720140920    10
10 B0106130220130420    25
# ℹ 298 more rows

dim(data_PIC_filtered)

[1] 694524      8

SG_before <- unique(data_PIC_pvalues60$Social_Group)

# Apply the filter
data_PIC_filtered <- data_PIC_pvalues60 %>%
    filter(ID != Follower_ID)

# Get unique Social Groups after filtering
SG_after <- unique(data_PIC_filtered$Social_Group)

# Identify which SGs were removed
SG_removed <- setdiff(SG_before, SG_after)
SG_removed

[1] "B0106201218260219" "B0211160720210920"

# Find the records from the original dataset for the removed SGs
removed_records <- data_PIC_pvalues60 %>%
  filter(Social_Group %in% SG_removed)

# View the removed records (optional)
removed_records

# A tibble: 23 × 30
# Groups:   Social_Group [2]
         ID  LINE PED_IDENT_SIRE PED_IDENT_DAM LIT_LITTER_ID PEN   TEST_FARM
      <int> <int>          <int>         <int>         <int> <fct>     <int>
 1 81152553    65       74753324      77604389      70115908 B0106       774
 2 81152553    65       74753324      77604389      70115908 B0106       774
 3 81152553    65       74753324      77604389      70115908 B0106       774
 4 81152553    65       74753324      77604389      70115908 B0106       774
 5 81152553    65       74753324      77604389      70115908 B0106       774
 6 81152553    65       74753324      77604389      70115908 B0106       774
 7 81152553    65       74753324      77604389      70115908 B0106       774
 8 81152553    65       74753324      77604389      70115908 B0106       774
 9 81152553    65       74753324      77604389      70115908 B0106       774
10 81152553    65       74753324      77604389      70115908 B0106       774
# ℹ 13 more rows
# ℹ 23 more variables: ENTRY_TIME <chr>, EXIT_TIME <chr>, STAY_IN <int>,
#   FEED_INTK <int>, FEEDER_ENTRY_WT <int>, FEEDER_EXIT_WT <int>,
#   FEEDER_NO <int>, START_DAY <chr>, END_DAY <chr>, ENTRY_DATE <date>,
#   ENTRY <dttm>, EXIT_DATE <date>, EXIT <dttm>, OFFTEST_DAY <chr>,
#   Social_Group <chr>, Follower_ID <int>, Follower_Time <dttm>,
#   Follower_Social_Group <chr>, line <int>, Hour_ENTRY <int>, …

# Count how many records were removed
num_records_removed <- nrow(removed_records)
num_records_removed

[1] 23

# Count unique animals in the removed groups
num_animals_removed <- removed_records %>%
  distinct(ID) %>%
  nrow()

num_animals_removed

[1] 2

SG_removed <- setdiff(SG_before, SG_after)
SG_removed  # List of removed Social Groups

[1] "B0106201218260219" "B0211160720210920"

removed_records %>%
  group_by(ID) %>%
  tally()

# A tibble: 2 × 2
        ID     n
     <int> <int>
1 81152553    22
2 86156265     1

tally_before <- data_PIC_pvalues60 %>% group_by(Social_Group) %>% tally()
tally_after <- data_PIC_filtered %>% group_by(Social_Group) %>% tally()

tally_comparison <- tally_before %>%
  left_join(tally_after, by = "Social_Group", suffix = c("_before", "_after")) %>%
  mutate(records_lost = n_before - n_after)

tally_comparison

# A tibble: 308 × 4
   Social_Group      n_before n_after records_lost
   <chr>                <int>   <int>        <int>
 1 B0102240523240723     2775    2701           74
 2 B0104090519160719     3303    3290           13
 3 B0104180719250919     3010    3003            7
 4 B0104201218260219     2527    2451           76
 5 B0104240523240723     2164    2116           48
 6 B0104240621300821     1851    1835           16
 7 B0104280219070519     2513    2470           43
 8 B0106090519160719       60      51            9
 9 B0106090720140920     2725    2715           10
10 B0106130220130420     2753    2728           25
# ℹ 298 more rows

dim(data_PIC_filtered)

[1] 694524     30

dim(data_PIC_pvalues60)

[1] 707223     30

##3 animals and 3 social groups were removed.
#These animals likely had a large number of self-interactions, leading to a significant number of records being removed (~36k).
#Even though it’s just 3 animals/groups, the total number of interactions per animal may be high.

The echo: false option disables the printing of code (only output is displayed).