Peeper Analysis!

NOTES ABOUT DATASETS

So in summary:

data = imported results from ALL SELECTIONS, directly from RavenPro tables. contains information from every single male for every single one of his calls.

DATA / manual.data = all 30 calls for all males, full set of data (1886 observations)

Tdat = the temperature corrected, all 30 calls (1886 observations)

means = the temperature corrected means for each male (62 observations)

cvs = the temperature corrected cvs for each male (62 observations)

Packages Used

# for importing Raven selection files
library(Rraven)
# for correlations
library(reshape2)
# for graphs
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(extrafont)

## Registering fonts with R

# for importing files
library(readr)
library(readxl)
# for linear regressions
library (lme4)

## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

library(car)

## Warning: package 'car' was built under R version 4.4.1

## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

library(fitdistrplus)

## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: survival

# for PCA
library (vegan)

## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-6.1

library('factoextra')

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(PCAtest)
library(cluster)
# for dredge
library(MuMIn)
# for Beecher's statistic
library(IDmeasurer)

# function for calculating coefficient of variation
cv <- function(x) (sd(x)/mean(x))

# function that gets mode of a character value (e.g. which word is most common)
mode_char <- function(x) unique(x)[which.max(tabulate(match(x, unique(x))))]

Datasets

Spectrogram Data

# Import spectrogram data into R from Raven Selection Tables. 
data <- imp_raven(all.data = TRUE, 
                  only.spectro.view = TRUE,
                  path = '~/Raven Pro 1.6/Selections')

Accessory Information

# Import metadata containing body temperature and collection data
meta.data <- read_csv("~/Desktop/UTK/Tanner Lab/2024/Bd Project/Peeper Analysis/Spring Peeper Data - Collection Data.csv")

## Rows: 73 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): Male ID, Date, Location, Catcher, Recorder, Processer, Prevalence
## dbl (10): Body Temp, Weight (Bag + Frog), Weight (Bag), SVL, Tibia, Frog Wei...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Combining into one giant file

# Take the file (in the table as PC-000-24_MMDDYYYY_XX.wav) and separate into it's parts
data <- data %>% separate(`Begin File`, 
                          c("ID", "Date", "Location.wav"), 
                          sep = "_",
                          remove = FALSE
                          ) 

# New simpler column for location 
data <- data %>% mutate(Location = case_when(`Location.wav`=='FR.wav' ~ 'FR',
                                              `Location.wav`=='CV.wav' ~ 'CV'))

# Discard columns not needed for future
data <- data %>% dplyr::select(-c(`View`,`Channel`,`Occupancy`,`Low Freq (Hz)`,`High Freq (Hz)`,`Location.wav`, `Frame Duration`, `Reason`))

# Calculate ICI for values based on strict row calculation.
data <- data %>% mutate(ICI = lead(`Begin Time (s)`)-`End Time (s)`)
data <- data %>% filter (ICI >= 0)


# Calculate Call_Period (Duration + ICI) and Instantaneous Rate (1/Call_Period)*60
data <- data %>% mutate(Call_Period = `Delta Time (s)`+ICI)
data <- data %>% mutate(Call_Rate_Inst = (1/Call_Period)*60)

# Output the resulting files into a CSV, gets written in a working directory. Only have to do this once if nothing before this is being done
# write.csv(data,"data.csv",row.names=FALSE)

# In the CSV file, go through each Raven file and decide which 30 consecutive calls to use. Ideally, in a calling bout, start from the 6th bout and exclude the last call. Want consecutive call numbers. Save this as a new file (e.g. edited.data and import).
import.data <- read_csv("~/Desktop/UTK/Tanner Lab/2024/Bd Project/Peeper Analysis/edited.data.csv")

## Rows: 1886 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): Begin File, ID, Date, selec.file, Location
## dbl (11): Selection, Begin Time (s), End Time (s), Delta Time (s), Peak Freq...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# When importing data, going to go with the manually edited data. 
manual.data <- left_join(import.data, meta.data, by = join_by(ID == `Male ID`))
manual.data <- manual.data %>% dplyr::select(-Selection)
manual.data <- as.data.frame(manual.data)

write.csv(manual.data, "080724_data.csv",row.names=FALSE)

# Use the edited data sheet that was found using the previous code. Should be the same as manual.data.
DATA <- read_csv("~/Desktop/UTK/Tanner Lab/2024/Bd Project/Peeper Analysis/080724_data.csv")

## Rows: 1886 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Begin File, ID, Date.x, selec.file, Location.x, Date.y, Location.y...
## dbl (20): Begin Time (s), End Time (s), Delta Time (s), Peak Freq (Hz), Freq...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

DATA <- as.data.frame(DATA)

body.condition.model <- lm(data = DATA, `Frog Weight (g)`^(1/3) ~ SVL)
DATA <- DATA %>% mutate(condition = body.condition.model$residuals/SVL)

Correlations

cormat <- cor(DATA[ , c(18, 21, 32, 3, 4, 7, 15)])
melted_cormat <- melt(cormat)

 get_upper_tri <- function(cormat){
    cormat[lower.tri(cormat)]<- NA
    return(cormat)
 }
 
upper_tri <- get_upper_tri(cormat)

upper_cormat <- melt(upper_tri, na.rm = TRUE)

ggplot(data = upper_cormat, aes(Var2, Var1, fill = value))+
 geom_tile(color = "white")+
 scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
   midpoint = 0, limit = c(-1,1), space = "Lab", 
   name="Pearson\nCorrelation") +
  theme_minimal()+ 
 theme(axis.text.x = element_text(angle = 45, vjust = 1, 
    size = 12, hjust = 1))+
 coord_fixed()+
  geom_text(aes(Var2, Var1, label = round(value,4)), color = "black", size = 3)

Temperature + SVL Correction

Temperature Function

# Remember - if you change the name of the column for body temperature, you will need to edit the name within m & the Tcorrected_matrix to accurately reflect the new column name 

Tcorr <- function(col_numbers, target_temp, data){
  num_cols <- length(col_numbers)
  Tcorrected_matrix <- matrix(NA, nrow = nrow(data), ncol = num_cols)
  
  for(i in seq_along(col_numbers)){
    m <- lm(data[,col_numbers[i]] ~ data$`Body Temp`, data)$coefficients[2] # Calculate m by doing a linear regression 
    Tcorrected_matrix[, i] <- data[,col_numbers[i]] - m * (data$`Body Temp` - target_temp) # Save a vector so I can use it outside the function 
  }
  
  avg <- apply(Tcorrected_matrix, 1, mean)
  stdev <- apply(Tcorrected_matrix, 1, sd)
  colnames(Tcorrected_matrix) <- colnames(data[, col_numbers])
  
  result <- list(avg = avg, stdev = stdev, Tcorrected_matrix = Tcorrected_matrix)

  return(result)
}

SVL Function

Bcorr <- function(col_numbers, target_svl, data){
  num_cols <- length(col_numbers)
  Bcorrected_matrix <- matrix(NA, nrow = nrow(data), ncol = num_cols)
  
  for(i in seq_along(col_numbers)){
    m <- lm(data[,col_numbers[i]] ~ DATA$SVL, data)$coefficients[2] # Calculate m by doing a linear regression 
    Bcorrected_matrix[, i] <- data[,col_numbers[i]] - m * (DATA$SVL - target_svl) # Save a vector so I can use it outside the function 
  }
  
  avg <- apply(Bcorrected_matrix, 1, mean)
  stdev <- apply(Bcorrected_matrix, 1, sd)
  colnames(Bcorrected_matrix) <- colnames(data[, col_numbers])
  
  result <- list(avg = avg, stdev = stdev, Bcorrected_matrix = Bcorrected_matrix)

  return(result)
}

Doing the Corrections

# Set Values that you want to temperature correct for
#   3 = Delta Time
#   4 = Peak Freq
#   5 = Freq 5%
#   6 = Freq 95%
#   7 = BW 90%
#   13 = ICI
#   14 = Call Period
#   15 = Instantaneous Call Rate
selections <- c(3:7,13:15)

# Do the temperature correction to the target temp, aka 14
result <- Tcorr(selections, target_temp = 14, data = DATA)

# Format results as a dataframe
Tdat <- as.data.frame(result$Tcorrected_matrix)

# Add information such as ID, Location, and Prevalence
Tdat <- cbind(DATA[,c(9,10,12,18,21,22,23,30,31,32)],Tdat)
#   9 = ID
#   10 = Date
#   12 = Location
#   18 = Body Temp
#   21 = SVL
#   22 = Tibia
#   23 = Frog Weight (g)
#   30 = Log Infection
#   31 = Prevalence
#   32 = Condition

write.csv(Tdat,"102424_temp_corrected_data.csv",row.names=FALSE)

# Do the correction to the target size, aka 27 SVL
result <- Bcorr(col_numbers = c(11:15, 18), target_svl = 27, data = Tdat)

# 11 = Delta Time
# 12 = Peak Freq (Hz)
# 13 = Freq 5% (Hz)
# 14 = Freq 95% (Hz)
# 15 = BW 90% (Hz)
# 18 = Call_Rate_Inst

# Format results as a dataframe
Test <- as.data.frame(result$Bcorrected_matrix)

# Get rid of the corresponding columns in Tdat so that there are no duplicates
Tdat <- subset(Tdat, select = -c(11:15, 18))

# Bind the body correlated factors to the temperature corrected ones.
Tdat<-cbind(Tdat,Test)

write.csv(Tdat,"102424_temp_body_corrected_data.csv",row.names=FALSE)

CV Table

# use Excel to calculate the means and the sds of the CVs, then format it in a way that makes R happy
cv.table <- read_xlsx("~/Desktop/UTK/Tanner Lab/2024/Bd Project/Peeper Analysis/Peeper_CVs_Means_SDs.xlsx")

ggplot(cv.table, aes(x=reorder(Variable, Means), y=Means,fill=Variable)) +
  geom_bar(stat="identity")+
  theme_classic()+
  theme(axis.text.x = element_text(angle = 50, vjust = 0.5, hjust=0.5),
        legend.position = "none")+
  geom_errorbar( aes(ymin = Min, ymax = Max), 
                 data = cv.table, width = 0.2, alpha = 0.3)+
  geom_text(aes(label=round(Means,4)), vjust=-0.3, size=3.5, alpha=1)+
  labs(x = "Variable",
       y = "CV",
       title = "Variables ordered by CV, with Min and Max CVs")+
  scale_color_brewer(palette = "Set2")

ggplot(cv.table, aes(x=reorder(Variable, Means), y=Means,fill=Variable)) +
  geom_bar(stat="identity")+
  theme_classic()+
  theme(axis.text.x = element_text(angle = 50, vjust = 0.5, hjust=0.5),
        legend.position = "none")+
  geom_errorbar( aes(ymin = Means - SDs, ymax = Means + SDs), 
                 data = cv.table, width = 0.2, alpha = 0.3)+
  geom_text(aes(label=round(Means,4)), vjust=-0.3, size=3.5, alpha=1)+
  labs(x = "Variable",
       y = "CV",
       title = "Variables ordered by CV, with SDs")+
  scale_color_brewer(palette = "Set2")

subset.cv.table <- cv.table %>% slice(c(1,2,5))

ggplot(subset.cv.table, aes(x=reorder(Variable, Means), y=Means,fill=Variable)) +
  geom_bar(stat="identity")+
  theme_classic()+
  theme(axis.text.x = element_text(angle = 50, vjust = 0.5, hjust=0.5),
        legend.position = "none")+
  geom_errorbar( aes(ymin = Min, ymax = Max), 
                 data = subset.cv.table, width = 0.2, alpha = 0.3)+
  geom_text(aes(label=round(Means,4)), vjust=-0.3, size=3.5, alpha=1)+
  labs(x = "Variable",
       y = "CV",
       title = "Variables ordered by CV, with Min and Max CVs")+
  scale_color_brewer(palette = "Set2")

subset.cv.table <- subset.cv.table %>% mutate(Variable2 = c("Call Rate", "Call Duration", "Dominant Frequency"))

ggplot(subset.cv.table, aes(x=reorder(Variable2, Means), y=Means,fill=Variable2)) +
  geom_bar(stat="identity")+
  theme_classic()+
  geom_errorbar(aes(ymin = Means - SDs, ymax = Means + SDs), 
                 data = subset.cv.table, width = 0.2, alpha = 0.3)+
  geom_text(aes(label=round(Means,4)), 
            vjust=-0.3, size=3.5, alpha=1,
            family = "Times New Roman")+
  labs(x = "\nVariable",
       y = "CV",
       title = "Variables ordered by CV (with SDs)")+
  scale_color_brewer(palette = "Set2")+
  theme(axis.text.x = element_text(hjust=0.5),
        legend.position = "none",
        text = element_text(family = "Times New Roman"))

?geom_text()

Summary Data

Tdat$Date <- format(as.Date(Tdat$Date.x, format = "%m%d%y"), "%m/%d/%Y")

Means

# summarize means of these parameters by ID
means <- Tdat %>% 
  dplyr::group_by(ID, Date, Location.x, SVL, Tibia, Prevalence) %>%
  dplyr::summarize(Temp = mean(`Body Temp`),
            Weight = mean(`Frog Weight (g)`),
            Infection = mean(`Log Infection`),
            Condition = mean(condition),
            Call_Rate_Inst = mean(Call_Rate_Inst),
            Duration = mean(`Delta Time (s)`),
            ICI = mean(ICI),
            Period = mean(Call_Period),
            Freq = mean(`Peak Freq (Hz)`),
            Freq_5 = mean(`Freq 5% (Hz)`),
            Freq_95 = mean(`Freq 95% (Hz)`),
            BW = mean(`BW 90% (Hz)`),
            Load = 10^(Infection))

## `summarise()` has grouped output by 'ID', 'Date', 'Location.x', 'SVL', 'Tibia'.
## You can override using the `.groups` argument.

means <- ungroup(means)

FR <- means %>% filter(Location.x=='FR')
CV <- means %>% filter(Location.x=='CV')

write.csv(means,"peeper_infection_by_ID_means.csv",row.names=FALSE)

means %>% dplyr::summarize(weight.mean = mean(Weight),
                           weight.sd = sd(Weight),
                           svl.mean = mean(SVL),
                           svl.sd = sd(SVL),
                           tibia.mean = mean(Tibia),
                           tibia.sd = sd(Tibia),
                           load.mean = mean(Load),
                           load.sv = sd(Load),
                           load.max = max(Load),
                           load.min = min(Load))

## # A tibble: 1 × 10
##   weight.mean weight.sd svl.mean svl.sd tibia.mean tibia.sd load.mean load.sv
##         <dbl>     <dbl>    <dbl>  <dbl>      <dbl>    <dbl>     <dbl>   <dbl>
## 1        1.52     0.324     27.4   1.72       13.9    0.940      372.   1681.
## # ℹ 2 more variables: load.max <dbl>, load.min <dbl>

means %>% dplyr::group_by(Location.x) %>% dplyr::summarize(weight.mean = mean(Weight),
                                                           weight.sd = sd(Weight),
                                                           load.mean = mean(Load),
                                                           load.sv = sd(Load))

## # A tibble: 2 × 5
##   Location.x weight.mean weight.sd load.mean load.sv
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>
## 1 CV                1.44     0.316      564.   2308.
## 2 FR                1.60     0.314      168.    406.

CVs

cvs <- Tdat %>% 
  group_by(ID, Date, Location.x, SVL, Tibia, Prevalence) %>%
  dplyr::summarize(Temp = mean(`Body Temp`),
            Weight = mean(`Frog Weight (g)`),
            Infection = mean(`Log Infection`),
            Condition = mean(condition),
            Call_Rate_Inst = cv(Call_Rate_Inst),
            Duration = cv(`Delta Time (s)`),
            ICI = cv(ICI),
            Period = cv(Call_Period),
            Freq = cv(`Peak Freq (Hz)`),
            Freq_5 = cv(`Freq 5% (Hz)`),
            Freq_95 = cv(`Freq 95% (Hz)`),
            BW = cv(`BW 90% (Hz)`),
            Load = 10^(Infection))

## `summarise()` has grouped output by 'ID', 'Date', 'Location.x', 'SVL', 'Tibia'.
## You can override using the `.groups` argument.

cvs <- ungroup(cvs)

FR.cv <- cvs %>% filter(Location.x=='FR')
CV.cv <- cvs %>% filter(Location.x=='CV')

write.csv(cvs,"peeper_infection_by_ID_cvs.csv",row.names=FALSE)

CR v CVw

ggplot(data = data.frame(x = means$Call_Rate_Inst, y = cvs$Call_Rate_Inst), aes(x = x, y = y))+
  geom_point()+
  theme_minimal()+
  labs(x = "CR",
       y = "CVw")+
  stat_smooth(method = "lm",
              formula = y ~ x + I(x^2),
              se = TRUE)+
  theme(text = element_text(family = "Times New Roman"),
        axis.title=element_text(size=14))+
  labs(x = "Call Rate",
       y = "Call Rate Coefficient of Variation")

Testing Differences by Location

Sarah Dataset

library(lubridate)  # gives options more robust to different formats - or else use library(tidyverse)

background <- read_csv("~/Desktop/UTK/Tanner Lab/2024/Bd Project/Peeper Analysis/pscr_records.csv")

## New names:
## Rows: 174 Columns: 5
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): swab_id, bd_status dbl (2): ...1, bd_load date (1): date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

background.new <- read_csv("~/Desktop/UTK/Tanner Lab/2024/Bd Project/Peeper Analysis/pscr_records_full.csv")

## Rows: 247 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): date, bd_status
## dbl (1): bd_load
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

background.new <- as.data.frame(background.new)
background.new$Date <- mdy(background.new$date)

background %>% group_by(bd_status) %>%
  summarize(n = n(),
            mean = mean(bd_load),
            sd = sd(bd_load),
            max = max(bd_load))

## # A tibble: 2 × 5
##   bd_status     n  mean    sd    max
##   <chr>     <int> <dbl> <dbl>  <dbl>
## 1 negative    123    0     0      0 
## 2 positive     51  802. 2679. 15988.

51/(123+51)

## [1] 0.2931034

ggplot(data = background, aes(x = date, y = bd_load))+
  geom_point()+
  theme_minimal()

ggplot(data = means, aes(x = Date, y = Load))+
  geom_point()+
  theme_minimal()

ggplot(data = background.new, aes(x = Date, y = bd_load))+
  geom_point()+
  theme_minimal()

twentytwo <- background.new %>% filter(between(Date, as.Date('2022-01-01'), as.Date('2022-12-31')))
twentythree <- background.new %>% filter(between(Date, as.Date('2023-01-01'), as.Date('2023-12-31')))
twentyfour <- background.new %>% filter(between(Date, as.Date('2024-01-01'), as.Date('2024-12-31')))
ggplot(data = twentytwo, aes(x = Date, y = bd_load))+
  geom_point()+
  theme_minimal()+
  labs(title = "2022")

ggplot(data = twentythree, aes(x = Date, y = bd_load))+
  geom_point()+
  theme_minimal()+
  labs(title = "2023")

ggplot(data = twentyfour, aes(x = Date, y = bd_load))+
  geom_point()+
  theme_minimal()+
  labs(title = "2024")

T-tests - Meta

t.test(FR$SVL, CV$SVL)

## 
##  Welch Two Sample t-test
## 
## data:  FR$SVL and CV$SVL
## t = 0.099258, df = 59.741, p-value = 0.9213
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.8344102  0.9215352
## sample estimates:
## mean of x mean of y 
##  27.47200  27.42844

t.test(FR$Tibia, CV$Tibia)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Tibia and CV$Tibia
## t = 0.53143, df = 58.182, p-value = 0.5971
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3546734  0.6110901
## sample estimates:
## mean of x mean of y 
##  13.97133  13.84313

t.test(FR$Weight, CV$Weight) # p = 0.04076 (FR weigh more)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Weight and CV$Weight
## t = 2.0912, df = 59.802, p-value = 0.04076
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.007268515 0.327523151
## sample estimates:
## mean of x mean of y 
##  1.603333  1.435938

t.test(FR$Temp, CV$Temp) # p = 0.00104 (FR is colder)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Temp and CV$Temp
## t = -3.4895, df = 50.425, p-value = 0.001014
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.536395 -1.222355
## sample estimates:
## mean of x mean of y 
##  13.63000  16.50937

t.test(FR$Infection, CV$Infection)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Infection and CV$Infection
## t = -0.60249, df = 59.999, p-value = 0.5491
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.7500039  0.4027840
## sample estimates:
## mean of x mean of y 
##  1.138851  1.312461

t.test(FR$Condition, CV$Condition) # p = 0.0259 (FR in better condition)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Condition and CV$Condition
## t = 2.2877, df = 56.877, p-value = 0.0259
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0002062304 0.0031034703
## sample estimates:
##     mean of x     mean of y 
##  0.0008430817 -0.0008117687

Means

t.test(FR$Call_Rate_Inst, CV$Call_Rate_Inst) # 0.05821 (FR as slower)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Call_Rate_Inst and CV$Call_Rate_Inst
## t = -1.9333, df = 56.597, p-value = 0.05821
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -14.0176910   0.2474667
## sample estimates:
## mean of x mean of y 
##  75.20998  82.09510

t.test(FR$Duration, CV$Duration) # 0.0902

## 
##  Welch Two Sample t-test
## 
## data:  FR$Duration and CV$Duration
## t = 1.7245, df = 55.414, p-value = 0.0902
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.001417704  0.018928660
## sample estimates:
## mean of x mean of y 
## 0.1125010 0.1037455

t.test(FR$Freq, CV$Freq)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Freq and CV$Freq
## t = 1.0152, df = 49.687, p-value = 0.3149
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -28.79341  87.63243
## sample estimates:
## mean of x mean of y 
##  2943.453  2914.033

CVs

t.test(FR.cv$Call_Rate_Inst, CV.cv$Call_Rate_Inst)

## 
##  Welch Two Sample t-test
## 
## data:  FR.cv$Call_Rate_Inst and CV.cv$Call_Rate_Inst
## t = -0.92993, df = 58.408, p-value = 0.3562
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.08633059  0.03155612
## sample estimates:
## mean of x mean of y 
## 0.1966349 0.2240222

t.test(FR.cv$Duration, CV.cv$Duration) # p = 0.07271

## 
##  Welch Two Sample t-test
## 
## data:  FR.cv$Duration and CV.cv$Duration
## t = 1.8375, df = 45.252, p-value = 0.07271
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.001472842  0.032172354
## sample estimates:
##  mean of x  mean of y 
## 0.07405864 0.05870888

t.test(FR.cv$Freq, CV.cv$Freq)

## 
##  Welch Two Sample t-test
## 
## data:  FR.cv$Freq and CV.cv$Freq
## t = 1.0061, df = 56.337, p-value = 0.3187
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.001864403  0.005627529
## sample estimates:
##  mean of x  mean of y 
## 0.01257389 0.01069232

Testing Differences by Prevalence

Infection v Condition

ggplot(data = means, aes(x = Infection, y = Condition))+
  geom_point()+
  theme_minimal()+
  stat_smooth(method = "lm",
              se = FALSE)

## `geom_smooth()` using formula = 'y ~ x'

cor.test(means$Infection, means$Condition)

## 
##  Pearson's product-moment correlation
## 
## data:  means$Infection and means$Condition
## t = -0.40223, df = 60, p-value = 0.6889
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2977693  0.2005074
## sample estimates:
##         cor 
## -0.05185789

ggplot(means, aes(x = Infection, y = Condition, color = Location.x))+
  geom_point()+
  theme_minimal()+
  stat_smooth(method = "lm",
              se = FALSE)

## `geom_smooth()` using formula = 'y ~ x'

Data Subsets

absent <- means %>% filter(Prevalence == "Absent")
present <- means %>% filter(Prevalence == "Present")

absent.cvs <- cvs %>% filter(Prevalence == "Absent")
present.cvs <- cvs %>% filter(Prevalence == "Present")

means %>% dplyr::group_by(Prevalence) %>% dplyr::summarize(weight.mean = mean(Weight),
                                                           weight.sd = sd(Weight),
                                                           svl.mean = mean(SVL),
                                                           svl.sd = sd(SVL),
                                                           tibia.mean = mean(Tibia),
                                                           tibia.sd = sd(Tibia),
                                                           load.mean = mean(Load),
                                                           load.sv = sd(Load),
                                                           load.min = min(Load),
                                                           load.max = max(Load))

## # A tibble: 2 × 11
##   Prevalence weight.mean weight.sd svl.mean svl.sd tibia.mean tibia.sd load.mean
##   <chr>            <dbl>     <dbl>    <dbl>  <dbl>      <dbl>    <dbl>     <dbl>
## 1 Absent            1.56     0.334     27.5   1.24       14.0    0.878        1 
## 2 Present           1.50     0.321     27.4   1.89       13.9    0.974      524.
## # ℹ 3 more variables: load.sv <dbl>, load.min <dbl>, load.max <dbl>

T-tests - Meta

t.test(absent$SVL, present$SVL)

## 
##  Welch Two Sample t-test
## 
## data:  absent$SVL and present$SVL
## t = 0.082092, df = 47.716, p-value = 0.9349
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.7885493  0.8556705
## sample estimates:
## mean of x mean of y 
##  27.47333  27.43977

t.test(absent$Tibia, present$Tibia)

## 
##  Welch Two Sample t-test
## 
## data:  absent$Tibia and present$Tibia
## t = 0.27987, df = 34.909, p-value = 0.7812
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4441327  0.5861529
## sample estimates:
## mean of x mean of y 
##  13.95556  13.88455

t.test(absent$Weight, present$Weight)

## 
##  Welch Two Sample t-test
## 
## data:  absent$Weight and present$Weight
## t = 0.6732, df = 30.576, p-value = 0.5059
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1264435  0.2509385
## sample estimates:
## mean of x mean of y 
##  1.561111  1.498864

t.test(absent$Temp, present$Temp)

## 
##  Welch Two Sample t-test
## 
## data:  absent$Temp and present$Temp
## t = 0.53987, df = 31.417, p-value = 0.5931
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.479705  2.545867
## sample estimates:
## mean of x mean of y 
##  15.49444  14.96136

t.test(absent$Condition, present$Condition) # no difference in condition between absent v present

## 
##  Welch Two Sample t-test
## 
## data:  absent$Condition and present$Condition
## t = 0.56133, df = 35.578, p-value = 0.5781
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.001148318  0.002026738
## sample estimates:
##     mean of x     mean of y 
##  0.0003006626 -0.0001385471

Means

t.test(absent$Call_Rate_Inst, present$Call_Rate_Inst)

## 
##  Welch Two Sample t-test
## 
## data:  absent$Call_Rate_Inst and present$Call_Rate_Inst
## t = 0.076262, df = 25.225, p-value = 0.9398
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -9.113764  9.814988
## sample estimates:
## mean of x mean of y 
##  79.01241  78.66180

t.test(absent$Duration, present$Duration)

## 
##  Welch Two Sample t-test
## 
## data:  absent$Duration and present$Duration
## t = 0.89459, df = 34.652, p-value = 0.3772
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.006166278  0.015875971
## sample estimates:
## mean of x mean of y 
## 0.1114274 0.1065726

t.test(absent$Freq, present$Freq)

## 
##  Welch Two Sample t-test
## 
## data:  absent$Freq and present$Freq
## t = 0.1947, df = 27.949, p-value = 0.847
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -64.49056  78.03660
## sample estimates:
## mean of x mean of y 
##  2933.075  2926.302

CVs

t.test(absent.cvs$Call_Rate_Inst, present.cvs$Call_Rate_Inst)

## 
##  Welch Two Sample t-test
## 
## data:  absent.cvs$Call_Rate_Inst and present.cvs$Call_Rate_Inst
## t = -0.93768, df = 43.595, p-value = 0.3536
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.08442222  0.03081858
## sample estimates:
## mean of x mean of y 
## 0.1917496 0.2185515

t.test(absent.cvs$Duration, present.cvs$Duration)

## 
##  Welch Two Sample t-test
## 
## data:  absent.cvs$Duration and present.cvs$Duration
## t = -0.77032, df = 48.086, p-value = 0.4449
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.021648787  0.009655016
## sample estimates:
##  mean of x  mean of y 
## 0.06188033 0.06787721

t.test(absent.cvs$Freq, present.cvs$Freq)

## 
##  Welch Two Sample t-test
## 
## data:  absent.cvs$Freq and present.cvs$Freq
## t = 0.8019, df = 30.002, p-value = 0.4289
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.002611655  0.005988508
## sample estimates:
##  mean of x  mean of y 
## 0.01280099 0.01111257

PCA

PCA Analysis

Setting up the Data

PCA <- means %>% dplyr::select(c('Call_Rate_Inst','Duration','Freq'))
PCA <- as.data.frame(PCA)
rownames(PCA) <- c(means$ID)

Doing the PCA

asprcomp <- prcomp(PCA, center = TRUE, scale = TRUE)
summary(asprcomp)

## Importance of components:
##                           PC1    PC2    PC3
## Standard deviation     1.1506 1.0433 0.7665
## Proportion of Variance 0.4413 0.3628 0.1958
## Cumulative Proportion  0.4413 0.8042 1.0000

asprcomp

## Standard deviations (1, .., p=3):
## [1] 1.1506475 1.0433114 0.7664932
## 
## Rotation (n x k) = (3 x 3):
##                       PC1         PC2        PC3
## Call_Rate_Inst -0.4994145  0.67579434 -0.5421136
## Duration        0.7481873  0.02093696 -0.6631572
## Freq           -0.4368077 -0.73679282 -0.5160769

Testing for Significance

result <- PCAtest(PCA, alpha = 0.05, varcorr=TRUE, counter=FALSE, plot=TRUE)

## 
## Sampling bootstrap replicates... Please wait
## 
## Calculating confidence intervals of empirical statistics... Please wait
## 
## Sampling random permutations... Please wait
## 
## Comparing empirical statistics with their null distributions... Please wait
## 
## ========================================================
## Test of PCA significance: 3 variables, 62 observations
## 1000 bootstrap replicates, 1000 random permutations
## ========================================================
## 
## Empirical Psi = 0.2829, Max null Psi = 0.5360, Min null Psi = 6e-04, p-value = 0.028
## Empirical Phi = 0.2172, Max null Phi = 0.2989, Min null Phi = 0.0099, p-value = 0.028
## 
## Empirical eigenvalue #1 = 1.32399, Max null eigenvalue = 1.57552, p-value = 0.082
## Empirical eigenvalue #2 = 1.0885, Max null eigenvalue = 1.15043, p-value = 0.017
## Empirical eigenvalue #3 = 0.58751, Max null eigenvalue = 0.98507, p-value = 0.986
## 
## PC 1 is significant and accounts for 44.1% (95%-CI:40.3-58.9) of the total variation
## 
## Variables , and 2 have significant loadings on PC 1
## 
## Variables , and 2 have significant correlations with PC 1

Visualization

fviz_eig(asprcomp, addlabels = TRUE)

## Registered S3 method overwritten by 'broom':
##   method        from 
##   nobs.multinom MuMIn

fviz_pca_var(asprcomp,axes = c(1, 2))

Extracting Scores

scores = scores(asprcomp)
pcs <- as.data.frame(scores) # same as scores
full.data <- cbind(means, pcs)

K-Means Clusters

Choosing the optimal number of clusters

https://uc-r.github.io/kmeans_clustering

cluster.test <- full.data[, c("PC1", "PC2")]
fviz_nbclust(cluster.test, FUNcluster = kmeans, method = "wss")

fviz_nbclust(cluster.test, FUNcluster = kmeans, method = "silhouette")

gap_stat <- clusGap(cluster.test, FUN = kmeans, nstart = 25,
                    K.max = 10, B = 50)
fviz_gap_stat(gap_stat)

2 Clusters

set.seed(48)  # Set seed for reproducibility
kmeans_result <- kmeans(full.data[, c("PC1", "PC2")], centers = 2)

# Add cluster information to the combined data
full.data$Cluster <- as.factor(kmeans_result$cluster)

# Plot the clusters
ggplot(data = full.data, aes(x = PC1, y = PC2, col = Cluster, label = Prevalence)) +
  geom_point() +
  labs(x = "PC1", y = "PC2", col = "Cluster")+
  theme_minimal()+
  labs(title = "Color by Cluster")+
  geom_text(hjust=0, vjust=0)

fviz_cluster(kmeans_result, data = full.data[, c("PC1", "PC2")],
             geom = "point",
             ellipse.type = "convex", 
             ggtheme = theme_minimal()
             )

full.data <- full.data %>% mutate(Uninfected = if_else(Prevalence == "Absent", "Uninfected", "Infected"))

ggplot(data = full.data, aes (x = Location.x, y = Prevalence, color = Cluster))+
  geom_jitter()+
  theme_minimal()

ggplot(data = full.data, aes (x = Location.x, y = Condition, color = Cluster))+
  geom_jitter()+
  theme_minimal()

ggplot(data = full.data, aes (x = Prevalence, y = Condition, color = Cluster))+
  geom_jitter()+
  theme_minimal()

table(full.data$Cluster, full.data$Prevalence)

##    
##     Absent Present
##   1     10      21
##   2      8      23

chisq.test(full.data$Cluster, full.data$Prevalence)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  full.data$Cluster and full.data$Prevalence
## X-squared = 0.078283, df = 1, p-value = 0.7796

library(infotheo)
mutinformation(full.data$Cluster, full.data$Prevalence)

## [1] 0.002529211

?mutinformation

ggplot(data = full.data, aes(x = PC1, y = PC2)) +
  geom_point(aes(shape = Uninfected), size = 3) +
  scale_shape_manual(values=c(16, 1))+
  labs(x = "PC1 (Call Duration) Score", 
       y = "PC2 (Call Rate & Dominant Frequency) Score", 
       shape = expression("Male"~italic(Bd)~"Status"))+
  theme_minimal()+
  scale_color_brewer(palette = "Set2")+
  theme(text = element_text(family = "Times New Roman"),
        axis.title=element_text(size=14),
        legend.position="bottom")

# Coordinates of individuals
ind.coord <- as.data.frame(get_pca_ind(asprcomp)$coord)
# Add clusters obtained using the K-means algorithm
ind.coord$cluster <- factor(kmeans_result$cluster)
# Add Species groups from the original data sett
ind.coord$Uninfected <- full.data$Uninfected

eigenvalue <- round(get_eigenvalue(asprcomp), 1)
variance.percent <- eigenvalue$variance.percent

ggplot(ind.coord, aes(x = Dim.1, y = Dim.2))+
  stat_ellipse(aes(color = cluster))+
  geom_point(aes(shape = Uninfected), size = 3)+
  scale_shape_manual(values=c(16, 1))+
  theme_minimal()+
  theme(text = element_text(family = "Times New Roman"),
        axis.title=element_text(size=14),
        legend.position = "bottom")+
  labs(x = "PC1 Score",
       y = "PC2 Score",
       color = "Cluster",
       shape = expression("Male"~italic(Bd)~"Status"))

library(ggpubr)

ggscatter(ind.coord, x = "Dim.1", y = "Dim.2", 
  color = "cluster", palette = "npg", ellipse = TRUE, ellipse.type = "convex",
  shape = "Uninfected", size = 3,  legend = "bottom", 
  ggtheme = theme_minimal(),
  xlab = paste0("PC1 Score" ),
  ylab = paste0("PC2 Score" ))+  
  theme(text = element_text(family = "Times New Roman"),
        axis.title=element_text(size=14))+
  scale_shape_manual(values=c(16, 1))+
  labs(color = "Cluster",
       fill = "Cluster")

Looking at individual PCs

PC1

PC1.global.model = lmer(data = full.data, PC1 ~ Infection + Condition + Infection*Condition + (1|Location.x))

Anova(PC1.global.model)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: PC1
##                      Chisq Df Pr(>Chisq)  
## Infection           0.2965  1    0.58612  
## Condition           2.7533  1    0.09705 .
## Infection:Condition 4.2440  1    0.03939 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(PC1.global.model)

## Linear mixed model fit by REML ['lmerMod']
## Formula: PC1 ~ Infection + Condition + Infection * Condition + (1 | Location.x)
##    Data: full.data
## 
## REML criterion at convergence: 170.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.42238 -0.53008  0.07755  0.47276  2.86640 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  Location.x (Intercept) 0.009946 0.09973 
##  Residual               1.225436 1.10699 
## Number of obs: 62, groups:  Location.x, 2
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)           0.09541    0.22033   0.433
## Infection            -0.06401    0.12561  -0.510
## Condition           -53.73550   81.65736  -0.658
## Infection:Condition  89.83792   43.60838   2.060
## 
## Correlation of Fixed Effects:
##             (Intr) Infctn Condtn
## Infection   -0.699              
## Condition   -0.036  0.015       
## Infctn:Cndt  0.021  0.017 -0.801

# from https://cran.r-project.org/web/packages/interactions/vignettes/interactions.html
library(interactions)
interact_plot(PC1.global.model, pred = Infection, modx = Condition, plot.points = TRUE)+
  theme_minimal()+
  theme(text = element_text(family = "Times New Roman"),
        axis.title=element_text(size=14),
        legend.position="bottom")+
  labs("Body Condition",
       x = "Log Infection Load",
       y = "PC1 (Call Duration) Scores")

full.data <- full.data %>% mutate(ConditionType = if_else(Condition >= mean(Condition),"Above Average","Below Average" ))
ggplot(data = full.data, aes(x = Infection, y = PC1, group = ConditionType, color = ConditionType))+
  geom_point()+
  stat_smooth(method = "lm",
              se = FALSE)+
  theme_minimal()+
  labs(x = "Log Infection Load",
       y ="PC1 Score")+
  theme(text = element_text(family = "Times New Roman"),
        axis.title=element_text(size=14),
        legend.position="bottom")+
  scale_colour_brewer(palette = "Set2")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data = full.data, aes(x = Infection, y = PC1))+
  geom_point()+
  theme_minimal()+
  labs(x = "Log Infection Load",
       y ="PC1 Score (Call Duration)")+
  theme(text = element_text(family = "Times New Roman"),
        axis.title=element_text(size=14),
        legend.position="bottom")+
  scale_colour_brewer(palette = "Set2")

PC2

PC2.global.model = lmer(data = full.data, PC2 ~ Infection + Condition + Infection*Condition + (1|Location.x))

Anova(PC2.global.model)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: PC2
##                      Chisq Df Pr(>Chisq)
## Infection           0.0906  1     0.7634
## Condition           2.1790  1     0.1399
## Infection:Condition 0.1287  1     0.7198

ggplot(data = full.data, aes(x = Infection, y = PC2))+
  geom_point()+
  theme_minimal()+
  theme(text = element_text(family = "Times New Roman"),
        axis.title=element_text(size=14),
        legend.position="bottom")+
  labs(x = "Log Infection Load",
       y = "PC2 Score (Call Rate + Dominant Frequency) ")

Looking at individual variables

Call Rate

call.rate.global.model <- lmer(data = means, Call_Rate_Inst ~ Infection * Condition + (1|Location.x), 
                               REML = FALSE,
                               na.action = "na.fail")

Anova(call.rate.global.model)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: Call_Rate_Inst
##                      Chisq Df Pr(>Chisq)
## Infection           0.0580  1     0.8097
## Condition           0.2153  1     0.6426
## Infection:Condition 0.8814  1     0.3478

call.rate.cvs.global <- lmer(data = cvs, Call_Rate_Inst ~ Infection * Condition + (1|Location.x),
                             REML = FALSE,
                             na.action = "na.fail")

## boundary (singular) fit: see help('isSingular')

Anova(call.rate.cvs.global)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: Call_Rate_Inst
##                      Chisq Df Pr(>Chisq)  
## Infection           0.0376  1    0.84633  
## Condition           5.6328  1    0.01763 *
## Infection:Condition 0.3878  1    0.53344  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ggplot(means, aes(x = Infection, y = Call_Rate_Inst))+
  geom_point()+
  theme_classic()+
  stat_smooth(method = 'lm',
              se = FALSE)+
  theme(text = element_text(family = "Times New Roman"))+
  labs(y = "Call Rate (calls / min)",
       x = "Log Infection Load")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(cvs, aes(x = Infection, y = Call_Rate_Inst))+
  geom_point()+
  theme_classic()+
  stat_smooth(method = 'lm',
              se = FALSE)+
  theme(text = element_text(family = "Times New Roman"))+
  labs(title = "Call Rate CV")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(cvs, aes(x = Condition, y = Call_Rate_Inst))+
  geom_point()+
  theme_classic()+
  stat_smooth(method = 'lm',
              se = FALSE)+
  theme(text = element_text(family = "Times New Roman"))+
  labs(title = "Call Rate CV")

## `geom_smooth()` using formula = 'y ~ x'

Call Duration

call.duration.global.model <- lmer(data = means, Duration ~ Infection + Condition + Infection * Condition + (1|Location.x))

Anova(call.duration.global.model)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: Duration
##                      Chisq Df Pr(>Chisq)  
## Infection           0.7338  1    0.39166  
## Condition           4.0272  1    0.04477 *
## Infection:Condition 5.9890  1    0.01440 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

call.duration.cvs.model <- lmer(data = cvs, Duration ~ Infection + Condition + Infection * Condition + (1|Location.x))

Anova(call.duration.cvs.model)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: Duration
##                      Chisq Df Pr(>Chisq)
## Infection           0.0145  1     0.9042
## Condition           0.0000  1     0.9993
## Infection:Condition 1.2472  1     0.2641

ggplot(means, aes(x = Infection, y = Duration))+
  geom_point()+
  theme_classic()+
  stat_smooth(method = 'lm',
              se = FALSE)+
  theme(text = element_text(family = "Times New Roman"))+
  labs(y = "Call Duration (s)",
       x = "Log Infection Load")

## `geom_smooth()` using formula = 'y ~ x'

interact_plot(call.duration.global.model, pred = Infection, modx = Condition, plot.points = TRUE)+
  theme_minimal()+
  theme(text = element_text(family = "Times New Roman"),
        axis.title=element_text(size=14),
        legend.position="bottom")+
  labs("Body Condition",
       x = "Log Infection Load",
       y = "Call Duration")

ggplot(cvs, aes(x = Infection, y = Duration))+
  geom_point()+
  theme_classic()+
  stat_smooth(method = 'lm',
              se = FALSE)+
  theme(text = element_text(family = "Times New Roman"))+
  labs(title = "Duration CVs")

## `geom_smooth()` using formula = 'y ~ x'

Dominant Frequency

freq.global.model <- lmer(data = means, Freq ~ Infection + Condition + Infection * Condition + (1|Location.x))

Anova(freq.global.model)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: Freq
##                      Chisq Df Pr(>Chisq)
## Infection           0.0853  1     0.7703
## Condition           1.7292  1     0.1885
## Infection:Condition 0.0701  1     0.7912

freq.cvs.global.model <- lmer(data = cvs, Freq ~ Infection + Condition + Infection * Condition + (1|Location.x))

Anova(freq.cvs.global.model)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: Freq
##                      Chisq Df Pr(>Chisq)
## Infection           0.0034  1     0.9537
## Condition           0.7975  1     0.3718
## Infection:Condition 0.0000  1     0.9996

ggplot(means, aes(x = Infection, y = Freq/1000))+
  geom_point()+
  theme_classic()+
  stat_smooth(method = 'lm',
              se = FALSE)+
  theme(text = element_text(family = "Times New Roman"))+
  labs(y = "Dominant Frequency (kHz)",
       x = "Log Infection Load")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(cvs, aes(x = Infection, y = Freq))+
  geom_point()+
  theme_classic()+
  stat_smooth(method = 'lm',
              se = FALSE)+
  theme(text = element_text(family = "Times New Roman"))+
  labs(title = "Frequency CVs")

## `geom_smooth()` using formula = 'y ~ x'

Beecher’s Information Statistic

Google AI Overview:

Beecher’s information statistic (Hs) is a metric used in animal behavior studies to quantify the amount of individual identity information contained within a specific signal, like an animal’s appearance or markings, allowing researchers to compare how well different signals can be used to distinguish between individuals within a population; essentially, it measures how much information a particular signal carries about an individual’s identity

Univariate

BIS <- Tdat %>% dplyr::select(c(1,13,14,18))

BIS <- dplyr::rename(BIS, Duration = `Delta Time (s)`)
BIS <- dplyr::rename(BIS, Frequency = `Peak Freq (Hz)`)

calcHSnpergroup(BIS,sumHS=F)

##             vars Pr   HS
## 1             ID NA   NA
## 2       Duration  0 1.43
## 3      Frequency  0 1.61
## 4 Call_Rate_Inst  0 0.39

Multivariate

PCA_2 <- calcPCA(BIS) 
calcHS(PCA_2)

## HS for significant vars         HS for all vars 
##                    2.89                    2.89

knitr::include_graphics("~/Desktop/UTK/Tanner Lab/2024/Bd Project/Peeper Analysis/Beecher.png")

Shannon’s Entropy

library(DescTools)

## Warning: package 'DescTools' was built under R version 4.4.1

## 
## Attaching package: 'DescTools'

## The following object is masked from 'package:car':
## 
##     Recode

Tdat <- Tdat %>% mutate(Presence = if_else(Prevalence == "Absent", 0, 1))
Absent <- Tdat$Presence
Duration <- Tdat$`Delta Time (s)`
Dominant_Freq <- Tdat$`Peak Freq (Hz)`
Call_Rate <- Tdat$Call_Rate_Inst

MutInf(Absent, Duration, base = 2)

## [1] 0.8639203

MutInf(Absent, Dominant_Freq, base = 2)

## [1] 0.8639203

MutInf(Absent, Call_Rate, base = 2)

## [1] 0.8639203

Correlations

cormat2 <- cor(Tdat[ , c(8,13, 14, 18)])
melted_cormat <- melt(cormat2)

upper_tri2 <- get_upper_tri(cormat2)

upper_cormat2 <- melt(upper_tri2, na.rm = TRUE)

ggplot(data = upper_cormat2, aes(Var2, Var1, fill = value))+
 geom_tile(color = "white")+
 scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
   midpoint = 0, limit = c(-1,1), space = "Lab", 
   name="Pearson\nCorrelation") +
  theme_minimal()+ 
 theme(axis.text.x = element_text(angle = 45, vjust = 1, 
    size = 12, hjust = 1))+
 coord_fixed()+
  geom_text(aes(Var2, Var1, label = round(value,4)), color = "black", size = 3)

cor.test(means$Infection,means$Duration)

## 
##  Pearson's product-moment correlation
## 
## data:  means$Infection and means$Duration
## t = -0.92608, df = 60, p-value = 0.3581
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3578681  0.1350621
## sample estimates:
##        cor 
## -0.1187108

cor.test(means$Infection,means$Freq)

## 
##  Pearson's product-moment correlation
## 
## data:  means$Infection and means$Freq
## t = -0.27266, df = 60, p-value = 0.7861
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2824649  0.2164919
## sample estimates:
##         cor 
## -0.03517855

cor.test(means$Infection,means$Call_Rate_Inst)

## 
##  Pearson's product-moment correlation
## 
## data:  means$Infection and means$Call_Rate_Inst
## t = 0.28808, df = 60, p-value = 0.7743
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2145950  0.2842944
## sample estimates:
##        cor 
## 0.03716526

110724

Trina Chou

Peeper Analysis!

NOTES ABOUT DATASETS

Packages Used

Datasets

Spectrogram Data

Accessory Information

Combining into one giant file

Correlations

Temperature + SVL Correction

Temperature Function

SVL Function

Doing the Corrections

CV Table

Summary Data

Means

CVs

CR v CVw

Testing Differences by Location

Sarah Dataset

T-tests - Meta

Means

CVs

Testing Differences by Prevalence

Infection v Condition

Data Subsets

T-tests - Meta

Means

CVs

PCA

PCA Analysis

Setting up the Data

Doing the PCA

Testing for Significance

Visualization

Extracting Scores

K-Means Clusters

Choosing the optimal number of clusters

2 Clusters

Looking at individual PCs

PC1

PC2

Looking at individual variables

Call Rate

Call Duration

Dominant Frequency

Beecher’s Information Statistic

Univariate

Multivariate

Shannon’s Entropy

Correlations