Full dataset analysis

Preliminaries

Load libraries

library(knitr)       #for R Markdown
library(MASS)        #for negative binomial glm (glm.nb)
library(lme4)        #for mixed models
library(emmeans)     #for posthoc
library(car)         #for Anova
library(survival)    #for survival analysis
library(coxme)
library(rptR)        #for repeatability analysis
library(MuMIn)       #for model selection (dredge)
library(ggfortify)   #for plotting survival analysis
library(ggsignif)    #for labeling significance in ggplots
library(GGally)     #for correlation matrix
library(tidyverse)   #for data processing, put last to avoid function masking

Data processing

Read dataset, correct variable types, and derive variables.

data_raw <- read.csv("data_personality_final_030126.csv",                 
                 # to avoid reading errors
                 fileEncoding="UTF-8-BOM", na.strings = c("","N/A","#VALUE!"))

data <- data_raw %>%
  #clean up some entries for easier analysis
  mutate(gosner_stage = ifelse(gosner_stage=="45/46", 46, as.integer(gosner_stage))) %>%
  mutate(tadpole_ID = factor(paste0("T", tadpole_ID), levels = paste0("T", sort(unique(tadpole_ID))))) %>%
  
  #correct variable types
  mutate_at(c("tadpole_ID","morph","water_y.n"), as.factor) %>%
  mutate_at(c("found_date","assay_date_OF","assay_date_shelter"),  lubridate::mdy) %>%
  mutate_at(c("invisible_time_hh_mm_ss"), lubridate::hms)  %>%
  mutate_at("exploration_rate_explored_div_by_total", as.numeric) %>%
  
  #derive variables
  mutate(condition = body_width_mm/length_without_tail_mm,
         age = assay_date_OF - found_date,
         age = if_else(age < 0, NA, age)) %>%
  
  #add trial sequence for each tadpole
  arrange(tadpole_ID, week, replicate) %>%
  group_by(tadpole_ID) %>%
  mutate(trial_seq = row_number()) %>%
  ungroup()

Sample sizes

data %>% select(tadpole_ID, week) %>% table

##           week
## tadpole_ID 1 2 3 4 5 6 7 8 9
##        T5  4 4 4 4 4 0 0 0 0
##        T6  4 4 4 4 4 0 0 0 0
##        T7  4 4 4 4 4 0 0 0 0
##        T8  4 4 4 4 4 4 4 4 4
##        T9  4 4 4 4 4 0 0 0 0
##        T10 4 4 4 4 4 0 0 0 0
##        T11 4 4 4 4 4 0 0 0 0
##        T12 4 4 4 4 4 0 0 0 0
##        T13 4 4 4 4 4 4 0 0 0
##        T14 4 4 4 4 4 0 0 0 0
##        T15 4 4 4 4 4 0 0 0 0
##        T16 4 4 4 4 4 0 0 0 0
##        T17 4 4 4 4 0 0 0 0 0
##        T18 4 4 4 4 0 0 0 0 0
##        T19 4 4 4 4 0 0 0 0 0
##        T20 4 4 4 4 0 0 0 0 0
##        T21 4 4 4 4 0 0 0 0 0
##        T22 4 4 4 4 0 0 0 0 0
##        T23 4 4 4 4 0 0 0 0 0
##        T24 4 4 4 4 0 0 0 0 0
##        T25 4 4 4 4 0 0 0 0 0
##        T26 4 4 4 4 0 0 0 0 0
##        T27 4 4 4 4 0 0 0 0 0
##        T28 4 4 4 4 0 0 0 0 0
##        T29 4 4 4 4 4 0 0 0 0
##        T30 4 4 4 4 4 0 0 0 0
##        T31 4 4 4 4 0 0 0 0 0
##        T32 4 4 4 4 4 0 0 0 0
##        T33 4 4 4 4 4 0 0 0 0
##        T34 4 4 4 4 0 0 0 0 0
##        T35 4 4 4 4 4 0 0 0 0

data %>% select(tadpole_ID, gosner_stage) %>% table

##           gosner_stage
## tadpole_ID 26 30 31 36 37 41 42 45 46
##        T5   3 16  0  0  0  0  0  0  0
##        T6   3  0  0  4  0  2  2  2  6
##        T7   4  4  0  0  0  4  4  0  4
##        T8   8 16  4  0  0  4  0  0  4
##        T9   4  4  0  0  0  4  4  0  4
##        T10  0  4  2  0  2  4  0  0  8
##        T11  0  8  2  0  0  2  4  0  4
##        T12  0  6  2  0  2  0  6  0  4
##        T13  0 24  0  0  0  0  0  0  0
##        T14  0  6  2  0  0  4  4  0  4
##        T15  4  4  0  0  0  4  4  0  4
##        T16  4  4  0  0  0  6  2  0  4
##        T17  4  2  0  0  0  4  0  0  4
##        T18  0  0  4  0  2  4  2  0  4
##        T19  0  0  4  0  2  2  4  0  4
##        T20  0  0  4  0  2  2  4  0  4
##        T21  0  0  6  0  2  2  2  0  4
##        T22  0  0  4  0  0  4  4  0  4
##        T23  0  0  4  0  0  4  4  0  4
##        T24  0  0  4  0  2  6  0  0  4
##        T25  0  0  4  0  4  4  0  0  4
##        T26  4  0  0  0  4  4  0  0  4
##        T27  0  0  4  0  2  2  4  0  4
##        T28  4  0  0  0  4  4  0  0  4
##        T29  4  0  0  0  4  4  4  0  4
##        T30  2  0  2  0  4  4  0  0  8
##        T31  0  0  4  0  0  4  4  0  4
##        T32  4  0  0  0  4  4  0  4  4
##        T33  4  0  4  0  0  4  4  0  4
##        T34  0  0  4  0  0  4  2  2  4
##        T35  4  0  0  0  4  4  4  0  4

Developmental rate

#set highlighter colors
tad_ids <- unique(data$tadpole_ID)
color_highlights <- setNames(rep("grey80", length(tad_ids)), tad_ids)

# Assign distinct colors to weirdo tadpoles
color_highlights ["T5"]  <- "#3B1C8C" 
color_highlights ["T8"]  <- "#56B4E9" 
color_highlights ["T13"] <- "#21908C"

ggplot(data %>% filter(), 
       aes(x = age, y = gosner_stage, group = tadpole_ID, color = tadpole_ID)) +
  geom_line(alpha = 0.9, size = 1) +
  labs(x = "Age (days)", y = "Gosner stage") +
  scale_color_manual(values = color_highlights, breaks = c("T5", "T8", "T13")) +
  theme_classic(base_size = 18)

T5, T8 & T13 are the weirdos that didn’t develop/develop slow, remove from all analysis. Removing G46 data as well, since they are dry in assays and hard to compare

data_filt <- data %>% 
  filter(!tadpole_ID %in% c("T5","T8","T13"), 
         water_y.n !="n",
         gosner_stage != 46) %>%
  droplevels()

sample sizes w/o outliers

data_filt %>% select(tadpole_ID, week) %>% table()

##           week
## tadpole_ID 1 2 3 4
##        T6  3 4 4 2
##        T7  4 4 4 4
##        T9  4 4 4 4
##        T10 4 4 4 0
##        T11 4 4 4 4
##        T12 4 4 4 4
##        T14 4 4 4 4
##        T15 4 4 4 4
##        T16 4 4 4 4
##        T17 4 2 4 0
##        T18 4 4 4 0
##        T19 4 4 0 0
##        T20 4 4 4 0
##        T21 4 4 4 0
##        T22 4 4 4 0
##        T23 4 4 4 0
##        T24 4 4 4 0
##        T25 4 4 4 0
##        T26 4 4 4 0
##        T27 4 4 4 0
##        T28 4 4 4 0
##        T29 4 4 4 4
##        T30 4 4 4 0
##        T31 4 4 4 0
##        T32 4 4 4 4
##        T33 4 4 4 4
##        T34 4 4 4 0
##        T35 4 4 4 4

data_filt %>% select(tadpole_ID, gosner_stage) %>% table()

##           gosner_stage
## tadpole_ID 26 30 31 36 37 41 42 45
##        T6   3  0  0  4  0  2  2  2
##        T7   4  4  0  0  0  4  4  0
##        T9   4  4  0  0  0  4  4  0
##        T10  0  4  2  0  2  4  0  0
##        T11  0  8  2  0  0  2  4  0
##        T12  0  6  2  0  2  0  6  0
##        T14  0  6  2  0  0  4  4  0
##        T15  4  4  0  0  0  4  4  0
##        T16  4  4  0  0  0  6  2  0
##        T17  4  2  0  0  0  4  0  0
##        T18  0  0  4  0  2  4  2  0
##        T19  0  0  4  0  2  2  0  0
##        T20  0  0  4  0  2  2  4  0
##        T21  0  0  6  0  2  2  2  0
##        T22  0  0  4  0  0  4  4  0
##        T23  0  0  4  0  0  4  4  0
##        T24  0  0  4  0  2  6  0  0
##        T25  0  0  4  0  4  4  0  0
##        T26  4  0  0  0  4  4  0  0
##        T27  0  0  4  0  2  2  4  0
##        T28  4  0  0  0  4  4  0  0
##        T29  4  0  0  0  4  4  4  0
##        T30  2  0  2  0  4  4  0  0
##        T31  0  0  4  0  0  4  4  0
##        T32  4  0  0  0  4  4  0  4
##        T33  4  0  4  0  0  4  4  0
##        T34  0  0  4  0  0  4  2  2
##        T35  4  0  0  0  4  4  4  0

Make a dataset with individual averages by week

data_wk_avg_filt <- data_filt %>%
  group_by(tadpole_ID, week) %>%
  summarise(avg_speed_mm = mean(avg_speed_mm, na.rm = TRUE),
            mob_avg_speed_mm = mean(mob_avg_speed_mm, na.rm = TRUE),
            mobility_rate = mean(mobility_rate, na.rm = TRUE),
            exploration_rate_explored_div_by_total = mean(exploration_rate_explored_div_by_total, na.rm = TRUE),
            total_distance_mm = mean(total_distance_mm, na.rm = TRUE),
            total_time_frozen_s = mean(total_time_frozen_s, na.rm = TRUE),
            lat_emerge = mean(lat_emerge, na.rm = TRUE),
            time_uncovered = mean(time_uncovered, na.rm = TRUE),
            num_crossing = mean(num_crossing, na.rm = TRUE),
            .groups = "drop"    #ungroup after summarise
  )

Missing week data

open field assays

data_wk_avg_filt %>%
  ungroup() %>%  
  complete(tadpole_ID, week = 1:4) %>%
  filter(!is.na(mob_avg_speed_mm)) %>%
  select(tadpole_ID, week) %>%
  table()

##           week
## tadpole_ID 1 2 3 4
##        T6  1 1 1 1
##        T7  1 1 1 1
##        T9  1 1 1 1
##        T10 1 1 1 0
##        T11 1 1 1 1
##        T12 1 1 1 1
##        T14 1 1 1 1
##        T15 1 1 1 1
##        T16 1 1 1 1
##        T17 1 1 1 0
##        T18 1 1 1 0
##        T19 1 1 0 0
##        T20 1 1 1 0
##        T21 1 1 1 0
##        T22 1 1 1 0
##        T23 1 1 1 0
##        T24 1 1 1 0
##        T25 1 1 1 0
##        T26 1 1 1 0
##        T27 1 1 1 0
##        T28 1 1 1 0
##        T29 1 1 1 1
##        T30 1 1 1 0
##        T31 1 1 1 0
##        T32 1 1 1 1
##        T33 1 1 1 1
##        T34 1 1 1 0
##        T35 1 1 1 1

shelter assays

data_wk_avg_filt %>%
  ungroup() %>%  
  complete(tadpole_ID, week = 1:4) %>%
  filter(!is.na(num_crossing)) %>%
  select(tadpole_ID, week) %>%
  table()

##           week
## tadpole_ID 1 2 3 4
##        T6  1 1 1 1
##        T7  0 1 1 1
##        T9  0 1 1 1
##        T10 1 1 1 0
##        T11 1 1 1 1
##        T12 1 1 1 1
##        T14 1 1 1 1
##        T15 1 1 1 1
##        T16 1 1 1 1
##        T17 1 1 1 0
##        T18 1 1 1 0
##        T19 1 1 0 0
##        T20 1 1 1 0
##        T21 1 1 1 0
##        T22 1 1 1 0
##        T23 1 1 1 0
##        T24 1 1 1 0
##        T25 1 1 1 0
##        T26 1 1 1 0
##        T27 1 1 1 0
##        T28 1 1 1 0
##        T29 1 1 1 1
##        T30 1 1 1 0
##        T31 1 1 1 0
##        T32 1 1 1 1
##        T33 1 1 1 1
##        T34 1 1 1 0
##        T35 1 1 1 1

Raw data at a glance

Probably too many now to be comprehensible, but let’s still look at them!

Average speed

ggplot(data_filt,
       aes(x = tadpole_ID,
           y = avg_speed_mm,
           fill = tadpole_ID) )+
  geom_boxplot(aes(alpha = as.factor(week)), position = position_dodge(width = 0.8)) +
  geom_point(aes(group = as.factor(week)),
    position = position_dodge(width = 0.8),
             alpha = 0.5, size = 1) +
  
  scale_fill_manual(values = rep(c("#3F51B5", "#B7410E"), length.out = 31), guide = "none") +
  scale_alpha_manual(
    values = c("1" = 1, "2" = 0.75, "3" = 0.5, "4" = 0.25, "5" = 0),
    name = "Week",
    guide = guide_legend(override.aes = list(fill = "grey40"))  # make legend boxes visible
    )+
  
  labs(x = "Tadpole ID", y = "Average Speed (mm/sec)", alpha = "Week") +
  theme_classic(base_size = 16) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#ggsave("Box_average speed.png", width = 14, height = 4.75, dpi = 300)

Average speed when moving

ggplot(data_filt,
       aes(x = tadpole_ID,
           y = mob_avg_speed_mm,
           fill = tadpole_ID) )+
  geom_boxplot(aes(alpha = as.factor(week)), position = position_dodge(width = 0.8)) +
  geom_point(aes(group = as.factor(week)),
    position = position_dodge(width = 0.8),
             alpha = 0.5, size = 1) +
  
  scale_fill_manual(values = rep(c("#3F51B5", "#B7410E"), length.out = 31), guide = "none") +
  scale_alpha_manual(
    values = c("1" = 1, "2" = 0.75, "3" = 0.5, "4" = 0.25, "5" = 0),
    name = "Week",
    guide = guide_legend(override.aes = list(fill = "grey40"))  # make legend boxes visible
    )+
  
  labs(x = "Tadpole ID", y = "Average Speed when moving (mm/sec)", alpha = "Week") +
  theme_classic(base_size = 16) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Proportion time moving

ggplot(data_filt,
       aes(x = tadpole_ID,
           y = mobility_rate,
           fill = tadpole_ID) )+
  geom_boxplot(aes(alpha = as.factor(week)), position = position_dodge(width = 0.8)) +
  geom_point(aes(group = as.factor(week)),
    position = position_dodge(width = 0.8),
             alpha = 0.5, size = 1) +
  
  scale_fill_manual(values = rep(c("#3F51B5", "#B7410E"), length.out = 31), guide = "none") +
  scale_alpha_manual(
    values = c("1" = 1, "2" = 0.75, "3" = 0.5, "4" = 0.25, "5" = 0),
    name = "Week",
    guide = guide_legend(override.aes = list(fill = "grey40"))  # make legend boxes visible
    )+
  scale_y_continuous(labels = scales::percent) +
  labs(x = "Tadpole ID", y = "Proportion time moving (%)", alpha = "Week") +
  theme_classic(base_size = 16) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Proportion area explored

ggplot(data_filt,
       aes(x = tadpole_ID,
           y = exploration_rate_explored_div_by_total,
           fill = tadpole_ID) )+
  geom_boxplot(aes(alpha = as.factor(week)), position = position_dodge(width = 0.8)) +
  geom_point(aes(group = as.factor(week)),
    position = position_dodge(width = 0.8),
             alpha = 0.5, size = 1) +
  
  scale_fill_manual(values = rep(c("#3F51B5", "#B7410E"), length.out = 31), guide = "none") +
  scale_alpha_manual(
    values = c("1" = 1, "2" = 0.75, "3" = 0.5, "4" = 0.25, "5" = 0),
    name = "Week",
    guide = guide_legend(override.aes = list(fill = "grey40"))  # make legend boxes visible
    )+
  scale_y_continuous(labels = scales::percent) +
  labs(x = "Tadpole ID", y = "Proportion area explored (%)", alpha = "Week") +
  theme_classic(base_size = 16) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#ggsave("Box_area explored.png", width = 14, height = 4.75, dpi = 300)

Total distance traveled

ggplot(data_filt,
       aes(x = tadpole_ID,
           y = total_distance_mm,
           fill = tadpole_ID) )+
  geom_boxplot(aes(alpha = as.factor(week)), position = position_dodge(width = 0.8)) +
  geom_point(aes(group = as.factor(week)),
    position = position_dodge(width = 0.8),
             alpha = 0.5, size = 1) +
  
  scale_fill_manual(values = rep(c("#3F51B5", "#B7410E"), length.out = 31), guide = "none") +
  scale_alpha_manual(
    values = c("1" = 1, "2" = 0.75, "3" = 0.5, "4" = 0.25, "5" = 0),
    name = "Week",
    guide = guide_legend(override.aes = list(fill = "grey40"))  # make legend boxes visible
    )+
  
  labs(x = "Tadpole ID", y = "Total distance traveled (mm)", alpha = "Week") +
  theme_classic(base_size = 16) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

### Total time frozen

ggplot(data_filt,
       aes(x = tadpole_ID,
           y = total_time_frozen_s,
           fill = tadpole_ID) )+
  geom_boxplot(aes(alpha = as.factor(week)), position = position_dodge(width = 0.8)) +
  geom_point(aes(group = as.factor(week)),
    position = position_dodge(width = 0.8),
             alpha = 0.5, size = 1) +
  
  scale_fill_manual(values = rep(c("#3F51B5", "#B7410E"), length.out = 31), guide = "none") +
  scale_alpha_manual(
    values = c("1" = 1, "2" = 0.75, "3" = 0.5, "4" = 0.25, "5" = 0),
    name = "Week",
    guide = guide_legend(override.aes = list(fill = "grey40"))  # make legend boxes visible
    )+
  
  labs(x = "Tadpole ID", y = "Total time frozen (sec)", alpha = "Week") +
  theme_classic(base_size = 16) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Latency to emerge

ggplot(data_filt,
       aes(x = tadpole_ID,
           y = lat_emerge,
           fill = tadpole_ID) )+
  geom_boxplot(aes(alpha = as.factor(week)), position = position_dodge(width = 0.8)) +
  geom_point(aes(group = as.factor(week)),
    position = position_dodge(width = 0.8),
             alpha = 0.5, size = 1) +
  
  scale_fill_manual(values = rep(c("#3F51B5", "#B7410E"), length.out = 31), guide = "none") +
  scale_alpha_manual(
    values = c("1" = 1, "2" = 0.75, "3" = 0.5, "4" = 0.25, "5" = 0),
    name = "Week",
    guide = guide_legend(override.aes = list(fill = "grey40"))  # make legend boxes visible
    )+
  
  labs(x = "Tadpole ID", y = "Latency to emerge (sec)", alpha = "Week") +
  theme_classic(base_size = 16) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Time exposed (out of shelter)

ggplot(data_filt,
       aes(x = tadpole_ID,
           y = time_uncovered,
           fill = tadpole_ID) )+
  geom_boxplot(aes(alpha = as.factor(week)), position = position_dodge(width = 0.8)) +
  geom_point(aes(group = as.factor(week)),
    position = position_dodge(width = 0.8),
             alpha = 0.5, size = 1) +
  
  scale_fill_manual(values = rep(c("#3F51B5", "#B7410E"), length.out = 31), guide = "none") +
  scale_alpha_manual(
    values = c("1" = 1, "2" = 0.75, "3" = 0.5, "4" = 0.25, "5" = 0),
    name = "Week",
    guide = guide_legend(override.aes = list(fill = "grey40"))  # make legend boxes visible
    )+
  
  labs(x = "Tadpole ID", y = "Time exposed (sec)", alpha = "Week") +
  theme_classic(base_size = 16) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Number of Crossovers

ggplot(data_filt,
       aes(x = tadpole_ID,
           y = num_crossing,
           fill = tadpole_ID) )+
  geom_boxplot(aes(alpha = as.factor(week)), position = position_dodge(width = 0.8)) +
  geom_point(aes(group = as.factor(week)),
    position = position_dodge(width = 0.8),
             alpha = 0.5, size = 1) +
  
  scale_fill_manual(values = rep(c("#3F51B5", "#B7410E"), length.out = 31), guide = "none") +
  scale_alpha_manual(
    values = c("1" = 1, "2" = 0.75, "3" = 0.5, "4" = 0.25, "5" = 0),
    name = "Week",
    guide = guide_legend(override.aes = list(fill = "grey40"))  # make legend boxes visible
    )+
  
  labs(x = "Tadpole ID", y = "# times crossing boundary", alpha = "Week") +
  theme_classic(base_size = 16) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Repeatability within each week

make a function to calculate repeatability of each week and combine to a data frame

rpt_by_wk <- function(data, var) {
  
  data %>%
    filter(week %in% 1:5) %>% # note that many don't have wk 5 data
    
    group_by(week) %>%
    
    group_map(~ { #apply to each group
      rpt_result <- rpt(
        reformulate("1 | tadpole_ID", response = var),
        grname = "tadpole_ID",
        data = .x,
        datatype = "Gaussian",
        nboot = 1000,
        npermut = 1000)
      rpt_summary <- data.frame(week = .y$week, summary(rpt_result)$rpt, row.names = NULL)
      return(rpt_summary)
      }) %>%
    
    bind_rows()
}

Activity - average speed when moving

rpt_avg_mobspeed <- rpt_by_wk(data_filt, "mob_avg_speed_mm")

## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :

rpt_avg_mobspeed %>% kable(digits = 3)

week	R	SE	X2.5.	X97.5.	P_permut	LRT_P
1	0.372	0.101	0.158	0.546	0.001	1
2	0.564	0.098	0.334	0.719	0.001	1
3	0.641	0.085	0.443	0.762	0.001	1
4	0.598	0.148	0.224	0.812	0.001	1

Activity - total distance traveled

rpt_dist <- rpt_by_wk(data_filt, "total_distance_mm")

## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :

rpt_dist %>% kable(digit = 3)

week	R	SE	X2.5.	X97.5.	P_permut	LRT_P
1	0.319	0.104	0.101	0.501	0.002	1
2	0.656	0.084	0.452	0.785	0.001	1
3	0.697	0.077	0.521	0.821	0.001	1
4	0.621	0.147	0.227	0.803	0.001	1

Activity - number of crossovers

rpt_cross <- rpt_by_wk(data_filt, "num_crossing")

## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :

rpt_cross %>% kable(digit = 3)

week	R	SE	X2.5.	X97.5.	P_permut	LRT_P
1	0.381	0.107	0.154	0.563	0.001	1
2	0.668	0.083	0.469	0.791	0.001	1
3	0.716	0.076	0.536	0.825	0.001	1
4	0.373	0.164	0.004	0.650	0.011	1

Boldness - latency to emerage

rpt_lat_emerge <- rpt_by_wk(data_filt, "lat_emerge")

## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :

rpt_lat_emerge %>% kable(digits = 3)

week	R	SE	X2.5.	X97.5.	P_permut	LRT_P
1	0.390	0.108	0.169	0.574	0.002	1
2	0.108	0.088	0.000	0.308	0.120	1
3	0.183	0.100	0.000	0.384	0.020	1
4	0.000	0.081	0.000	0.284	1.000	1

Boldness - Time exposed

rpt_time_uncovered <- rpt_by_wk(data_filt, "time_uncovered")

## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :

rpt_time_uncovered %>% kable(digits = 3)

week	R	SE	X2.5.	X97.5.	P_permut	LRT_P
1	0.487	0.103	0.251	0.653	0.001	1
2	0.414	0.103	0.197	0.589	0.001	1
3	0.173	0.097	0.000	0.354	0.036	1
4	0.175	0.141	0.000	0.470	0.081	1

Exploration - area explored

rpt_exploration_prop <- rpt_by_wk(data_filt, "exploration_rate_explored_div_by_total")

## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :

rpt_exploration_prop  %>% kable(digit = 3)

week	R	SE	X2.5.	X97.5.	P_permut
1	0.420	0.103	0.210	0.599	0.001
2	0.606	0.090	0.392	0.749	0.001
3	0.779	0.063	0.628	0.874	0.001
4	0.640	0.137	0.283	0.822	0.001

Figure: repeatability changes over time

Intepretation: personality becomes more solidified as the tadpole develops in activity, exploration, but not boldness

combine all results in to one data frame

rpt_all <- bind_rows(
  mutate(rpt_lat_emerge, variable = "Boldness: Latency to emerge"),
  mutate(rpt_time_uncovered, variable = "Boldness: Time exposed"),
  
  mutate(rpt_exploration_prop, variable = "Exploration: area explored"),
  
  mutate(rpt_avg_mobspeed, variable = "Activity: Avg Speed when moving"),
  mutate(rpt_dist, variable = "Activity: Total distance traveled"),
  mutate(rpt_cross, variable = "Activity: Number of crossovers")
)

ggplot(rpt_all, aes(x = week, y = R, color = variable)) +
  geom_line(size = 1, position = position_dodge(width = 0.3)) +
  geom_point(size = 2, position = position_dodge(width = 0.3)) +
  geom_errorbar(aes(ymin = R - SE, ymax = R + SE), 
                width = 0.3, alpha = 0.5, size = 0.8,
                position = position_dodge(width = 0.3)) +
  scale_color_manual(values = hcl.colors(6, palette = "Mako"))+
  labs(x = "Week", y = "Repeatability (R)", color = "Behavior") +
  theme_classic(base_size = 14)

Repeatability across development

make function for doing repeatability analysis of different variables together

rpt_multi_vars <- function(data, vars) {
  
  vars %>%
    purrr::map_df(~ {   #map + row binding
      rpt_result <- rpt(
        reformulate("1 | tadpole_ID", response = .x),
        grname = "tadpole_ID",
        data = data,
        datatype = "Gaussian",
        nboot = 1000,
        npermut = 1000
      )
      
      rpt_summary <- data.frame(variable = .x,
                                summary(rpt_result)$rpt,
                                row.names = NULL)
      
      return(rpt_summary)
    })
}

Repeatability across all assays

rpt_overall_summary <- rpt_multi_vars(data_filt, 
                                      c("mob_avg_speed_mm", "total_distance_mm","num_crossing",  #activity variables
                                        "exploration_rate_explored_div_by_total",                #exploration variable
                                        "lat_emerge", "time_uncovered"                           #boldness variables
                                        )
                                      )

## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :

rpt_overall_summary %>% kable(digit = 3)

variable	R	SE	X2.5.	X97.5.	P_permut	LRT_P
mob_avg_speed_mm	0.232	0.063	0.110	0.362	0.001	0.000
total_distance_mm	0.219	0.061	0.106	0.335	0.001	0.000
num_crossing	0.124	0.049	0.041	0.226	0.001	0.000
exploration_rate_explored_div_by_total	0.233	0.062	0.109	0.357	0.001	0.000
lat_emerge	0.047	0.031	0.000	0.110	0.022	0.044
time_uncovered	0.112	0.045	0.030	0.202	0.001	0.000

Repeatability of weekly averages

rpt_wk_avg_summary <- rpt_multi_vars(data_wk_avg_filt, 
                                      c("mob_avg_speed_mm", "total_distance_mm","num_crossing",  #activity variables
                                        "exploration_rate_explored_div_by_total",                #exploration variable
                                        "lat_emerge", "time_uncovered"                           #boldness variables
                                        )
                                      )

## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :
## Bootstrap Progress:
## Permutation Progress for tadpole_ID :

rpt_wk_avg_summary %>% kable(digit = 3)

variable	R	SE	X97.5.	P_permut	LRT_P
mob_avg_speed_mm	0.180	0.104	0.397	0.031	0.046
total_distance_mm	0.171	0.107	0.396	0.038	0.046
num_crossing	0.000	0.056	0.188	1.000	0.500
exploration_rate_explored_div_by_total	0.117	0.095	0.316	0.101	0.137
lat_emerge	0.000	0.057	0.183	1.000	1.000
time_uncovered	0.051	0.078	0.249	0.295	0.329

Developmental changes in personality

Figure: averages by week

Color codes: activity blue, exploration green , boldness coral

fig_avg_speed_mov <- 
  data_wk_avg_filt %>% 
  filter(week %in% 1:5) %>%
  ggplot(aes(x = week, y = mob_avg_speed_mm, group = tadpole_ID)) +
  geom_line(color = "grey80", size = 1, alpha = 0.8) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", color = "#0072B2", size = 1.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "point", color = "#0072B2", size = 4) +
  stat_summary(aes(group = 1), fun.data = mean_se, geom = "errorbar", color = "#0072B2", width = 0.2, size = 1.5) +
  labs(x = "Week", y = "Average Speed when \n moving (mm/s)") +
  theme_classic(base_size = 16)


fig_dist <- 
  data_wk_avg_filt %>% 
  filter(week %in% 1:5) %>%
  ggplot(aes(x = week, y = total_distance_mm, group = tadpole_ID)) +
  geom_line(color = "grey80", size = 1, alpha = 0.8) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", color = "#0072B2", size = 1.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "point", color = "#0072B2", size = 4) +
  stat_summary(aes(group = 1), fun.data = mean_se, geom = "errorbar", color = "#0072B2", width = 0.2, size = 1.5) +
  labs(x = "Week", y = "Total distance \n traveled (mm)") +
  theme_classic(base_size = 16)


fig_crossing <- 
  data_wk_avg_filt %>% 
  filter(week %in% 1:5) %>%
  ggplot(aes(x = week, y = num_crossing, group = tadpole_ID)) +
  geom_line(color = "grey80", size = 1, alpha = 0.8) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", color = "#0072B2", size = 1.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "point", color = "#0072B2", size = 4) +
  stat_summary(aes(group = 1), fun.data = mean_se, geom = "errorbar", color = "#0072B2", width = 0.2, size = 1.5) +
  labs(x = "Week", y = "Number of \n crossovers") +
  theme_classic(base_size = 16)


fig_area_explored <- 
  data_wk_avg_filt %>% 
  filter(week %in% 1:5) %>%
  ggplot(aes(x = week, y = exploration_rate_explored_div_by_total, group = tadpole_ID)) +
  geom_line(color = "grey80", size = 1, alpha = 0.8) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", color = "palegreen4", size = 1.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "point", color = "palegreen4", size = 4) +
  stat_summary(aes(group = 1), fun.data = mean_se, geom = "errorbar", color = "palegreen4", width = 0.2, size = 1.5) +
  labs(x = "Week", y = "Proportion area \n explored (%)") +
  scale_y_continuous(labels = scales::percent) +
  theme_classic(base_size = 16)


fig_lat_emerge <- 
  data_wk_avg_filt %>% 
  filter(week %in% 1:5) %>%
  ggplot(aes(x = as.factor(week), y = lat_emerge, group = tadpole_ID)) +
  geom_line(color = "grey80", size = 1, alpha = 0.8) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", color = "coral2", size = 1.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "point", color = "coral2", size = 4) +
  stat_summary(aes(group = 1), fun.data = mean_se, geom = "errorbar", color = "coral2", width = 0.2, size = 1.5) +
  labs(x = "Week", y = "Latency to \n emerge (s)") +
  theme_classic(base_size = 16)



fig_time_uncovered <- 
  data_wk_avg_filt %>% 
  filter(week %in% 1:5) %>%
  ggplot(aes(x = week, y = time_uncovered, group = tadpole_ID)) +
  geom_line(color = "grey80", size = 1, alpha = 0.8) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", color = "coral2", size = 1.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "point", color = "coral2", size = 4) +
  stat_summary(aes(group = 1), fun.data = mean_se, geom = "errorbar", color = "coral2", width = 0.2, size = 1.5) +
  labs(x = "Week", y = "Time exposed (s)") +
  theme_classic(base_size = 16)

egg::ggarrange(fig_avg_speed_mov,fig_dist, fig_crossing, fig_area_explored, fig_lat_emerge, fig_time_uncovered,
               nrow = 6)

Activity: average speed when moving

mod <- lmer(mob_avg_speed_mm ~ week + (1 | tadpole_ID), data = data_filt)
summary(mod)$coefficients %>% kable(digits = 3)

	Estimate	Std. Error	t value
(Intercept)	13.148	0.716	18.351
week	-1.901	0.217	-8.765

Anova(mod) %>% kable(digits = 3)

	Chisq	Df	Pr(>Chisq)
week	76.825	1	0

Activity: total distance traveled

mod <- lmer(total_distance_mm ~ week + (1 | tadpole_ID), data = data_filt)
summary(mod)$coefficients %>% kable(digits = 3)

	Estimate	Std. Error	t value
(Intercept)	5489.570	357.192	15.369
week	-985.764	109.769	-8.980

Anova(mod) %>% kable(digits = 3)

	Chisq	Df	Pr(>Chisq)
week	80.647	1	0

Activity: number of crossovers

mod <- lmer(num_crossing ~ week + (1 | tadpole_ID), data = data_filt)
summary(mod)$coefficients %>% kable(digits = 3)

	Estimate	Std. Error	t value
(Intercept)	21.242	1.252	16.967
week	-5.952	0.382	-15.570

Anova(mod) %>% kable(digits = 3)

	Chisq	Df	Pr(>Chisq)
week	242.436	1	0

Exploration: total area explored

mod <- lmer(exploration_rate_explored_div_by_total ~ week + (1 | tadpole_ID), data = data)
summary(mod)$coefficients %>% kable(digits = 3)

	Estimate	Std. Error	t value
(Intercept)	0.687	0.034	19.951
week	-0.095	0.005	-18.247

Anova(mod) %>% kable(digits = 3)

	Chisq	Df	Pr(>Chisq)
week	332.936	1	0

Boldness: latency to emerge

Technically I should use a Cox model instead of lmer…

mod <- lmer(lat_emerge ~ week + (1 | tadpole_ID), data = data_filt)
summary(mod)$coefficients %>% kable(digits = 3)

	Estimate	Std. Error	t value
(Intercept)	68.748	20.906	3.288
week	26.588	7.935	3.351

Anova(mod) %>% kable(digits = 3)

	Chisq	Df	Pr(>Chisq)
week	11.228	1	0.001

Boldness: time exposed

mod <- lmer(time_uncovered ~ week + (1 | tadpole_ID), data = data_filt)
summary(mod)$coefficients %>% kable(digits = 3)

	Estimate	Std. Error	t value
(Intercept)	226.712	14.518	15.616
week	-45.952	4.906	-9.367

Anova(mod) %>% kable(digits = 3)

	Chisq	Df	Pr(>Chisq)
week	87.734	1	0

Sequence effect

Figure: by trial sequence

fig_avg_speed_mov_seq<-
  ggplot(data_filt, aes(x = trial_seq, y = mob_avg_speed_mm, group = tadpole_ID)) +
  geom_line(color = "grey80", size = 1, alpha = 0.8) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", color = "#0072B2", size = 1.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "point", color = "#0072B2", size = 2.5) +
  stat_summary(aes(group = 1), fun.data = mean_se, geom = "errorbar", color = "#0072B2", width = 0.2) +
  labs(x = "Trial number", y = "Average Speed when \n moving (mm/s)") +
  theme_classic(base_size = 15)

fig_dist_seq <- 
  ggplot(data_filt, aes(x = trial_seq, y = total_distance_mm, group = tadpole_ID)) +
  geom_line(color = "grey80", size = 1, alpha = 0.8) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", color = "#0072B2", size = 1.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "point", color = "#0072B2", size = 4) +
  stat_summary(aes(group = 1), fun.data = mean_se, geom = "errorbar", color = "#0072B2", width = 0.2, size = 1.5) +
  labs(x = "Trial number", y = "Total distance \n traveled (mm)") +
  theme_classic(base_size = 16)


fig_crossing_seq <- 
  ggplot(data_filt, aes(x = trial_seq, y = num_crossing, group = tadpole_ID)) +
  geom_line(color = "grey80", size = 1, alpha = 0.8) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", color = "#0072B2", size = 1.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "point", color = "#0072B2", size = 4) +
  stat_summary(aes(group = 1), fun.data = mean_se, geom = "errorbar", color = "#0072B2", width = 0.2, size = 1.5) +
  labs(x = "Trial number", y = "Number of \n crossovers") +
  theme_classic(base_size = 16)


fig_area_explored_seq <- 
  ggplot(data_filt, aes(x = trial_seq, y = exploration_rate_explored_div_by_total, group = tadpole_ID)) +
  geom_line(color = "grey80", size = 1, alpha = 0.8) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", color = "palegreen4", size = 1.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "point", color = "palegreen4", size = 4) +
  stat_summary(aes(group = 1), fun.data = mean_se, geom = "errorbar", color = "palegreen4", width = 0.2, size = 1.5) +
  labs(x = "Trial number", y = "Proportion area \n explored (%)") +
  scale_y_continuous(labels = scales::percent) +
  theme_classic(base_size = 16)
  


fig_lat_emerge_seq <- 
  ggplot(data_filt, aes(x = trial_seq, y = lat_emerge, group = tadpole_ID)) +
  geom_line(color = "grey80", size = 1, alpha = 0.8) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", color = "coral2", size = 1.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "point", color = "coral2", size = 4) +
  stat_summary(aes(group = 1), fun.data = mean_se, geom = "errorbar", color = "coral2", width = 0.2, size = 1.5) +
  labs(x = "Trial number", y = "Latency to \n emerge (s)") +
  theme_classic(base_size = 16)
  


fig_time_uncovered_seq <-
  ggplot(data_filt, aes(x = trial_seq, y = time_uncovered, group = tadpole_ID)) +
  geom_line(color = "grey80", size = 1, alpha = 0.8) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", color = "coral2", size = 1.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "point", color = "coral2", size = 4) +
  stat_summary(aes(group = 1), fun.data = mean_se, geom = "errorbar", color = "coral2", width = 0.2, size = 1.5) +
  labs(x = "Trial number", y = "Time exposed (s))") +
  theme_classic(base_size = 16)

egg::ggarrange(fig_avg_speed_mov_seq,fig_dist_seq, fig_crossing_seq, fig_area_explored_seq, fig_lat_emerge_seq, fig_time_uncovered_seq,
               nrow = 6)

### Stats: TBD

Behavioral Syndrome

Figure: Correlation matrix

# Pick out variables to plot
vars <- c("mob_avg_speed_mm", 
          "total_distance_mm", 
          "num_crossing", 
          "exploration_rate_explored_div_by_total", 
          "lat_emerge", 
          "time_uncovered")

# Proper names to graph
var_labels <-   c("Average Speed \n when moving (mm/s)", 
                  "Total distance \n traveled (mm)", 
                  "Number of \n crossovers", 
                  "Proportion area \n explored (%)", 
                  "Latency to \n emerge (s)", 
                  "Time exposed (s)")

ggpairs(data_filt, 
        columns = vars,
        columnLabels = var_labels, # This replaces the variable names
        progress = FALSE, 
        upper = list(continuous = wrap("cor", size = 6, align_percent = 0.8)),
        lower = list(continuous = wrap("points", alpha = 0.4, size = 1)),
        diag = list(continuous = wrap("densityDiag", fill = "lightseagreen", alpha = 0.5))) +
  
  theme(strip.text = element_text(size = 14),
        axis.text = element_text(size = 10)) +
  theme_bw()

Principle Component Analysis

#pca in this package seems to not be able to handle na

#prep dataset without NA, but with labels for adding back later
data_pca <- data_filt %>%
  mutate(row_num = row_number()) %>%  #for later to reliably add PC values back
  select(row_num,
         mob_avg_speed_mm, 
         total_distance_mm, 
         num_crossing, 
         exploration_rate_explored_div_by_total, 
         lat_emerge, 
         time_uncovered) %>%
  drop_na()

#PCA
pca <- data_pca %>%
  select(-row_num) %>%
  prcomp(scale. = TRUE)

#extract PC1-3
pca_scores <- as.data.frame(pca$x) %>%
  mutate(row_num = data_pca$row_num) %>%
  select(row_num, PC1, PC2, PC3)

#Add back to original dataset
data_filt <- data_filt %>%
  mutate(row_num = row_number()) %>%
  left_join(pca_scores, by = "row_num") %>%
  select(-row_num) # Clean up the matching row numbers

Variance explained:

summary(pca)

## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     1.8361 1.0443 0.8851 0.62967 0.53752 0.26303
## Proportion of Variance 0.5619 0.1818 0.1306 0.06608 0.04815 0.01153
## Cumulative Proportion  0.5619 0.7437 0.8742 0.94031 0.98847 1.00000

Loadings:

pca$rotation %>% kable (digits = 3)

	PC1	PC2	PC3	PC4	PC5	PC6
mob_avg_speed_mm	-0.481	0.330	-0.223	0.016	-0.292	0.724
total_distance_mm	-0.490	0.237	-0.229	-0.047	-0.434	-0.679
num_crossing	-0.414	-0.320	0.265	-0.790	0.177	0.040
exploration_rate_explored_div_by_total	-0.465	0.213	-0.038	0.316	0.791	-0.106
lat_emerge	0.154	0.740	0.629	-0.175	-0.015	-0.044
time_uncovered	-0.342	-0.373	0.657	0.493	-0.262	0.029

Interpretation: larger PC1 means less active, larger PC2 means more shy

not the most intuitive, but ok for now I guess; could flip sign when graphing

Figure: morph differences

ggplot(data_filt, aes(x = -PC1, y = -PC2, color = morph)) +
  geom_point(alpha = 0.6, size = 3) +
  labs(x = "PC1: Activity (56.2%)", 
       y = "PC2: Boldness (18.2%)", 
       color = "Morph") +
  
  scale_color_manual(values = c("coral1","bisque3","seagreen")) +
  scale_fill_manual(values = c("coral1","bisque3","seagreen")) +
  
  theme_classic(base_size = 16)

Figure: average by week

redo the week dataset (dumb but I’m tired)

data_wk_avg_filt <- data_filt %>%
  group_by(tadpole_ID, week) %>%
  summarise(avg_speed_mm = mean(avg_speed_mm, na.rm = TRUE),
            mob_avg_speed_mm = mean(mob_avg_speed_mm, na.rm = TRUE),
            mobility_rate = mean(mobility_rate, na.rm = TRUE),
            exploration_rate_explored_div_by_total = mean(exploration_rate_explored_div_by_total, na.rm = TRUE),
            total_distance_mm = mean(total_distance_mm, na.rm = TRUE),
            total_time_frozen_s = mean(total_time_frozen_s, na.rm = TRUE),
            lat_emerge = mean(lat_emerge, na.rm = TRUE),
            time_uncovered = mean(time_uncovered, na.rm = TRUE),
            num_crossing = mean(num_crossing, na.rm = TRUE),
            PC1 = mean(PC1, na.rm = TRUE),
            PC2 = mean(PC2, na.rm = TRUE),
            PC3 = mean(PC3, na.rm = TRUE),
            .groups = "drop"    #ungroup after summarise
  )

  data_wk_avg_filt %>% 
  ggplot(aes(x = week, y = -PC1, group = tadpole_ID)) +
  geom_line(color = "grey80", size = 1, alpha = 0.8) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", color = "#0072B2", size = 1.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "point", color = "#0072B2", size = 4) +
  stat_summary(aes(group = 1), fun.data = mean_se, geom = "errorbar", color = "#0072B2", width = 0.2, size = 1.5) +
  labs(x = "Week", y = "PC1: Activity (56.2%)") +
  theme_classic(base_size = 16)

  data_wk_avg_filt %>% 
  ggplot(aes(x = week, y = -PC2, group = tadpole_ID)) +
  geom_line(color = "grey80", size = 1, alpha = 0.8) +
  stat_summary(aes(group = 1), fun = mean, geom = "line", color = "coral2", size = 1.5) +
  stat_summary(aes(group = 1), fun = mean, geom = "point", color = "coral2", size = 4) +
  stat_summary(aes(group = 1), fun.data = mean_se, geom = "errorbar", color = "coral2", width = 0.2, size = 1.5) +
  labs(x = "Week", y = "PC2: Boldness (18.2%)") +
  theme_classic(base_size = 16)