Peeper Analysis!

NOTES ABOUT DATASETS

So in summary:

DATA = all 30 calls for all males, full set of data (1886 observations)

Tdat = the temperature corrected, all 30 calls (1886 observations)

means = the temperature corrected means for each male (62 observations)

PCA = the means for the variables of interest, one for each male with the ID as the row name (not an actual variable), prepped for input into PCA observations (62 observations)

full.data = means + PC scores for each individual (62 observations)

asprcomp = results of the PCA, run with the 62 observations with the individuals

Packages Used

# for importing Raven selection files
library(Rraven)
# for graphs
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(ggplot2)
library(RColorBrewer)
# for importing files
library(readr)
library(readxl)
# for linear regressions
library (lme4)

## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

library(car)

## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

library(fitdistrplus)

## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: survival

# for PCA
library (vegan)

## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-6.1

library('factoextra')

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

# for graphs
library(extrafont)

## Registering fonts with R

# function for calculating coefficient of variation
cv <- function(x) (sd(x)/mean(x))

# function that gets mode of a character value (e.g. which word is most common)
mode_char <- function(x) unique(x)[which.max(tabulate(match(x, unique(x))))]

Data Import (taken from ‘manual data’ in 071424 file)

DATA <- read_csv("~/Desktop/UTK/Tanner Lab/Analysis/Call Analysis/Peeper Analysis/080724_data.csv")

## Rows: 1886 Columns: 32
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Begin File, ID, Date.x, selec.file, Location.x, Date.y, Location.y...
## dbl (21): Begin Time (s), End Time (s), Delta Time (s), Peak Freq (Hz), Freq...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

DATA <- as.data.frame(DATA)

Corrections

Temperature

# Remember - if you change the name of the column for body temperature, you will need to edit the name within m & the Tcorrected_matrix to accurately reflect the new column name 

Tcorr <- function(col_numbers, target_temp, data){
  num_cols <- length(col_numbers)
  Tcorrected_matrix <- matrix(NA, nrow = nrow(data), ncol = num_cols)
  
  for(i in seq_along(col_numbers)){
    m <- lm(data[,col_numbers[i]] ~ data$`Body Temp`, data)$coefficients[2] # Calculate m by doing a linear regression 
    Tcorrected_matrix[, i] <- data[,col_numbers[i]] - m * (data$`Body Temp` - target_temp) # Save a vector so I can use it outside the function 
  }
  
  avg <- apply(Tcorrected_matrix, 1, mean)
  stdev <- apply(Tcorrected_matrix, 1, sd)
  colnames(Tcorrected_matrix) <- colnames(data[, col_numbers])
  
  result <- list(avg = avg, stdev = stdev, Tcorrected_matrix = Tcorrected_matrix)

  return(result)
}

# Set Values that you want to temperature correct for
#   3 = Delta Time
#   4 = Peak Freq
#   5 = Freq 5%
#   6 = Freq 95%
#   7 = BW 90%
#   13 = ICI
#   14 = Call Period
#   15 = Call Duration
selections <- c(3:7,13:15)

# Do the temperature correction to the target temp, aka 14
result <- Tcorr(selections, target_temp = 14, data = DATA)

# Format results as a dataframe
Tdat <- as.data.frame(result$Tcorrected_matrix)

# Add information such as ID, Location, and Prevalence
Tdat<-cbind(DATA[,c(9,10,12,18,23,31,32)], Tdat)
#   9 = ID
#   10 = Date
#   12 = Location
#   18 = Body Temp
#   23 = Frog Weight (g)
#   31 = Log Infection
#   32 = Prevalence

# write.csv(Tdat,"080724_temp_corrected_data.csv",row.names=FALSE)

Body Size

Bcorr <- function(col_numbers, target_svl, data){
  num_cols <- length(col_numbers)
  Bcorrected_matrix <- matrix(NA, nrow = nrow(data), ncol = num_cols)
  
  for(i in seq_along(col_numbers)){
    m <- lm(data[,col_numbers[i]] ~ DATA$SVL, data)$coefficients[2] # Calculate m by doing a linear regression 
    Bcorrected_matrix[, i] <- data[,col_numbers[i]] - m * (DATA$SVL - target_svl) # Save a vector so I can use it outside the function 
  }
  
  avg <- apply(Bcorrected_matrix, 1, mean)
  stdev <- apply(Bcorrected_matrix, 1, sd)
  colnames(Bcorrected_matrix) <- colnames(data[, col_numbers])
  
  result <- list(avg = avg, stdev = stdev, Bcorrected_matrix = Bcorrected_matrix)

  return(result)
}

# Do the correction to the target size, aka 27 SVL
result <- Bcorr(col_numbers = c(9:12), target_svl = 27, data = Tdat)

# Format results as a dataframe
Test <- as.data.frame(result$Bcorrected_matrix)

Tdat <- subset(Tdat, select = -c(9:12))
# Add information such as ID, Location, and Prevalence
Tdat<-cbind(Tdat,Test)

write.csv(Tdat,"080724_temp_body_corrected_data.csv",row.names=FALSE)

Summary Data

# summarize means of these parameters by ID
means <- Tdat %>% 
  group_by(ID, Location.x, Prevalence) %>%
  summarize(Temp = mean(`Body Temp`),
            Infection = mean(`Log Infection`),
            Call_Rate_Inst = mean(Call_Rate_Inst),
            Duration = mean(`Delta Time (s)`),
            ICI = mean(ICI),
            Period = mean(Call_Period),
            Freq = mean(`Peak Freq (Hz)`),
            Freq_5 = mean(`Freq 5% (Hz)`),
            Freq_95 = mean(`Freq 95% (Hz)`),
            BW = mean(`BW 90% (Hz)`),
            Weight = mean(`Frog Weight (g)`)
            )

## `summarise()` has grouped output by 'ID', 'Location.x'. You can override using
## the `.groups` argument.

means <- ungroup(means)
FR <- means %>% filter(Location.x=='FR')
CV <- means %>% filter(Location.x=='CV')

t.test(FR$Infection, CV$Infection)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Infection and CV$Infection
## t = -0.60249, df = 59.999, p-value = 0.5491
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.7500039  0.4027840
## sample estimates:
## mean of x mean of y 
##  1.138851  1.312461

t.test(FR$Call_Rate_Inst, CV$Call_Rate_Inst)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Call_Rate_Inst and CV$Call_Rate_Inst
## t = -1.9423, df = 55.051, p-value = 0.05723
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -14.0872147   0.2203254
## sample estimates:
## mean of x mean of y 
##  74.68630  81.61975

t.test(FR$Duration, CV$Duration)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Duration and CV$Duration
## t = 1.7322, df = 56.067, p-value = 0.08873
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.001373744  0.018938119
## sample estimates:
## mean of x mean of y 
## 0.1127904 0.1040082

t.test(FR$ICI, CV$ICI)

## 
##  Welch Two Sample t-test
## 
## data:  FR$ICI and CV$ICI
## t = 0.054917, df = 46.103, p-value = 0.9564
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1974357  0.2085117
## sample estimates:
## mean of x mean of y 
## 0.8597254 0.8541874

t.test(FR$Period, CV$Period)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Period and CV$Period
## t = 0.14294, df = 46.017, p-value = 0.887
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1873376  0.2159779
## sample estimates:
## mean of x mean of y 
## 0.9725158 0.9581956

t.test(FR$Freq, CV$Freq)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Freq and CV$Freq
## t = 1.0152, df = 49.687, p-value = 0.3149
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -28.79341  87.63243
## sample estimates:
## mean of x mean of y 
##  2943.453  2914.033

t.test(FR$Freq_5, CV$Freq_5)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Freq_5 and CV$Freq_5
## t = 0.35236, df = 54.354, p-value = 0.7259
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -50.56418  72.13100
## sample estimates:
## mean of x mean of y 
##  2804.422  2793.638

t.test(FR$Freq_95, CV$Freq_95)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Freq_95 and CV$Freq_95
## t = 1.1473, df = 52.172, p-value = 0.2565
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -26.20089  96.17296
## sample estimates:
## mean of x mean of y 
##  3072.145  3037.159

t.test(FR$BW, CV$BW)

## 
##  Welch Two Sample t-test
## 
## data:  FR$BW and CV$BW
## t = 1.9928, df = 58.002, p-value = 0.05099
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1080514 48.5132778
## sample estimates:
## mean of x mean of y 
##  267.7228  243.5202

t.test(FR$Weight, CV$Weight)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Weight and CV$Weight
## t = 2.0912, df = 59.802, p-value = 0.04076
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.007268515 0.327523151
## sample estimates:
## mean of x mean of y 
##  1.603333  1.435938

weight.location.model <- lm(Weight ~ Location.x, means)
summary(weight.location.model)

## 
## Call:
## lm(formula = Weight ~ Location.x, data = means)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70333 -0.20333 -0.03594  0.10971  0.89667 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.43594    0.05570  25.782   <2e-16 ***
## Location.xFR  0.16740    0.08007   2.091   0.0408 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3151 on 60 degrees of freedom
## Multiple R-squared:  0.0679, Adjusted R-squared:  0.05237 
## F-statistic: 4.371 on 1 and 60 DF,  p-value: 0.0408

ggplot(means, aes(y = Weight, x = Location.x))+
  geom_boxplot()+
  theme_classic()

DATA %>% summarise(across(c(18,21:23), list(mean = ~mean(., na.rm = T),
                               sd = ~sd(., na.rm = T)
                               )))

##   Body Temp_mean Body Temp_sd SVL_mean   SVL_sd Tibia_mean  Tibia_sd
## 1       15.09512     3.437289 27.52057 1.784544   13.92479 0.9396512
##   Frog Weight (g)_mean Frog Weight (g)_sd
## 1             1.522853          0.3224575

# Body Temp
# Frog Weight (g)
# SVL
# Tibia

DATA %>% group_by(Prevalence) %>% summarise(across(c(18,21:23), list(mean = ~mean(., na.rm = T),
                               sd = ~sd(., na.rm = T)
                               )))

## # A tibble: 2 × 9
##   Prevalence `Body Temp_mean` `Body Temp_sd` SVL_mean SVL_sd Tibia_mean Tibia_sd
##   <chr>                 <dbl>          <dbl>    <dbl>  <dbl>      <dbl>    <dbl>
## 1 Absent                 15.5           3.44     27.5   1.21       14.0    0.854
## 2 Present                14.9           3.42     27.5   1.97       13.9    0.972
## # ℹ 2 more variables: `Frog Weight (g)_mean` <dbl>, `Frog Weight (g)_sd` <dbl>

ggplot(means, aes(y = Weight, x = Prevalence))+
  geom_boxplot()+
  theme_classic()

weight.prevalence.model <- lm(Weight ~ Prevalence, means)
summary(weight.prevalence.model)

## 
## Call:
## lm(formula = Weight ~ Prevalence, data = means)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.66111 -0.20805 -0.02999  0.20114  1.00114 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.56111    0.07662  20.375   <2e-16 ***
## PrevalencePresent -0.06225    0.09095  -0.684    0.496    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3251 on 60 degrees of freedom
## Multiple R-squared:  0.007746,   Adjusted R-squared:  -0.008791 
## F-statistic: 0.4684 on 1 and 60 DF,  p-value: 0.4964

ggplot(means, aes(y = Weight, x = Prevalence, color = Prevalence))+
  geom_boxplot()+
  theme_classic()+
  facet_grid(.~Location.x)

weight.prevalence.location.model <- lm(Weight ~ Prevalence * Location.x, means)
summary(weight.prevalence.location.model)

## 
## Call:
## lm(formula = Weight ~ Prevalence * Location.x, data = means)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70952 -0.19783 -0.06558  0.10217  0.89048 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     1.53333    0.10572  14.504   <2e-16 ***
## PrevalencePresent              -0.13551    0.12470  -1.087    0.282    
## Location.xFR                    0.05556    0.14951   0.372    0.712    
## PrevalencePresent:Location.xFR  0.15614    0.17753   0.880    0.383    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3172 on 58 degrees of freedom
## Multiple R-squared:  0.08691,    Adjusted R-squared:  0.03968 
## F-statistic:  1.84 on 3 and 58 DF,  p-value: 0.1499

ggplot(means, aes(y = Weight, x = Infection))+
  geom_point()+
  theme_classic()+
  facet_grid(.~Location.x)+
  stat_smooth(method = "lm",
              se = FALSE)

## `geom_smooth()` using formula = 'y ~ x'

weight.load.model <- lm(Weight ~ Infection, means)
summary(weight.load.model)

## 
## Call:
## lm(formula = Weight ~ Infection, data = means)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64845 -0.22320 -0.03235  0.18170  1.01736 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.54845    0.06123  25.290   <2e-16 ***
## Infection   -0.02565    0.03681  -0.697    0.489    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.325 on 60 degrees of freedom
## Multiple R-squared:  0.008028,   Adjusted R-squared:  -0.008505 
## F-statistic: 0.4856 on 1 and 60 DF,  p-value: 0.4886

ggplot(means, aes(y = Weight, x = Infection, color = Location.x))+
  geom_point()+
  theme_classic()+
  stat_smooth(method = "lm",
              se = TRUE)

## `geom_smooth()` using formula = 'y ~ x'

weight.load.location.model <- lm(Weight ~ Infection * Location.x, means)
summary(weight.load.location.model)

## 
## Call:
## lm(formula = Weight ~ Infection * Location.x, data = means)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7197 -0.2161 -0.0450  0.1148  0.8692 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             1.50417    0.08481  17.735   <2e-16 ***
## Infection              -0.05199    0.04851  -1.072    0.288    
## Location.xFR            0.07723    0.11953   0.646    0.521    
## Infection:Location.xFR  0.07125    0.07240   0.984    0.329    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.317 on 58 degrees of freedom
## Multiple R-squared:  0.08798,    Adjusted R-squared:  0.04081 
## F-statistic: 1.865 on 3 and 58 DF,  p-value: 0.1456

DATA %>% group_by(Prevalence) %>% summarise(across(c(18,21:23), list(mean = ~mean(., na.rm = T),
                               sd = ~sd(., na.rm = T)
                               )))

## # A tibble: 2 × 9
##   Prevalence `Body Temp_mean` `Body Temp_sd` SVL_mean SVL_sd Tibia_mean Tibia_sd
##   <chr>                 <dbl>          <dbl>    <dbl>  <dbl>      <dbl>    <dbl>
## 1 Absent                 15.5           3.44     27.5   1.21       14.0    0.854
## 2 Present                14.9           3.42     27.5   1.97       13.9    0.972
## # ℹ 2 more variables: `Frog Weight (g)_mean` <dbl>, `Frog Weight (g)_sd` <dbl>

FR2 <- DATA %>% filter(Location.x == 'FR')
CV2 <- DATA %>% filter(Location.x == 'CV')

t.test(FR2$SVL,CV2$SVL)

## 
##  Welch Two Sample t-test
## 
## data:  FR2$SVL and CV2$SVL
## t = -1.1282, df = 1865.6, p-value = 0.2594
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.25171080  0.06786881
## sample estimates:
## mean of x mean of y 
##  27.47232  27.56424

t.test(FR2$Tibia,CV2$Tibia)

## 
##  Welch Two Sample t-test
## 
## data:  FR2$Tibia and CV2$Tibia
## t = 2.0209, df = 1815.2, p-value = 0.04343
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.002594924 0.173219470
## sample estimates:
## mean of x mean of y 
##  13.97094  13.88303

ggplot(DATA, aes(y = Tibia, x = Location.x))+
  geom_boxplot()+
  theme_classic()

ggplot(DATA, aes(y = SVL, x = Location.x))+
  geom_boxplot()+
  theme_classic()

Correlations

cormat <- cor(Tdat[ , c(8:15)])
library(reshape2)

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

melted_cormat <- melt(cormat)

 get_upper_tri <- function(cormat){
    cormat[lower.tri(cormat)]<- NA
    return(cormat)
 }
 
upper_tri <- get_upper_tri(cormat)

upper_cormat <- melt(upper_tri, na.rm = TRUE)

ggplot(data = upper_cormat, aes(Var2, Var1, fill = value))+
 geom_tile(color = "white")+
 scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
   midpoint = 0, limit = c(-1,1), space = "Lab", 
   name="Pearson\nCorrelation") +
  theme_minimal()+ 
 theme(axis.text.x = element_text(angle = 45, vjust = 1, 
    size = 12, hjust = 1))+
 coord_fixed()+
  geom_text(aes(Var2, Var1, label = round(value,4)), color = "black", size = 3)

# results
# SPECTRAL: peak freq is highly correlated with freq 5% and freq 90%
# TEMPORAL: call period and ICI are almost perfectly correlated lol. both also negatively correlated with call rate (makes sense)

cor.test(Tdat$`Frog Weight (g)`, Tdat$`Freq 5% (Hz)`, method = c("pearson"))

## 
##  Pearson's product-moment correlation
## 
## data:  Tdat$`Frog Weight (g)` and Tdat$`Freq 5% (Hz)`
## t = -7.2475, df = 1884, p-value = 6.164e-13
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2082820 -0.1204528
## sample estimates:
##        cor 
## -0.1646938

PCA Analysis

PCA <- means %>% dplyr::select(c('Call_Rate_Inst','Duration','ICI','Period','Freq','Freq_5','Freq_95','BW'))
PCA <- as.data.frame(PCA)
rownames(PCA) <- c(means$ID)

PCA.full <- Tdat %>% dplyr::select(c('Call_Rate_Inst','Delta Time (s)','ICI','Call_Period','Peak Freq (Hz)','Freq 5% (Hz)','Freq 95% (Hz)','BW 90% (Hz)'))
PCA.full <- as.data.frame(PCA.full)

PCA

Doing the PCA

asprcomp <- prcomp(PCA, scale = TRUE) # regular R version
asprcomp

## Standard deviations (1, .., p=8):
## [1] 1.761732e+00 1.558847e+00 1.275686e+00 7.153068e-01 5.147411e-01
## [6] 2.495981e-01 2.141119e-06 1.243539e-15
## 
## Rotation (n x k) = (8 x 8):
##                        PC1         PC2         PC3         PC4         PC5
## Call_Rate_Inst -0.20977688 -0.48220768 -0.22590419 -0.38901620  0.72129635
## Duration       -0.20264941  0.14297984  0.60652038  0.54539909  0.52127578
## ICI             0.29185509  0.52295333 -0.13509349 -0.19666278  0.28516979
## Period          0.28347693  0.53350446 -0.10531740 -0.17036594  0.31331541
## Freq            0.50284691 -0.24059995  0.13990203  0.01041891  0.06749534
## Freq_5          0.50497675 -0.26010433 -0.04372966  0.24254392  0.07447527
## Freq_95         0.49080429 -0.25117525  0.23234676 -0.01960569  0.02027577
## BW             -0.02535633  0.01713845  0.69013654 -0.65129482 -0.13416958
##                        PC6           PC7           PC8
## Call_Rate_Inst -0.02889076 -4.393422e-07  2.920178e-16
## Duration        0.01297750 -1.751072e-07 -3.558077e-02
## ICI            -0.02358669 -1.611560e-07 -7.088976e-01
## Period         -0.02308133 -1.710518e-07  7.044134e-01
## Freq            0.81548752  2.233240e-06  2.354059e-16
## Freq_5         -0.38987902 -6.775339e-01 -1.195418e-11
## Freq_95        -0.41803672  6.829985e-01  1.205034e-11
## BW             -0.07827885 -2.728752e-01 -4.814578e-12

summary(asprcomp)

## Importance of components:
##                          PC1    PC2    PC3     PC4     PC5     PC6       PC7
## Standard deviation     1.762 1.5588 1.2757 0.71531 0.51474 0.24960 2.141e-06
## Proportion of Variance 0.388 0.3038 0.2034 0.06396 0.03312 0.00779 0.000e+00
## Cumulative Proportion  0.388 0.6917 0.8951 0.95909 0.99221 1.00000 1.000e+00
##                              PC8
## Standard deviation     1.244e-15
## Proportion of Variance 0.000e+00
## Cumulative Proportion  1.000e+00

get_eigenvalue(asprcomp)

##         eigenvalue variance.percent cumulative.variance.percent
## Dim.1 3.103701e+00     3.879627e+01                    38.79627
## Dim.2 2.430004e+00     3.037505e+01                    69.17131
## Dim.3 1.627374e+00     2.034217e+01                    89.51348
## Dim.4 5.116639e-01     6.395799e+00                    95.90928
## Dim.5 2.649584e-01     3.311980e+00                    99.22126
## Dim.6 6.229923e-02     7.787404e-01                   100.00000
## Dim.7 4.584391e-12     5.730489e-11                   100.00000
## Dim.8 1.546389e-30     1.932987e-29                   100.00000

# PC1 = Freq (+), Freq_5 (+), Freq_95(+) (38.8%)
# PC2 = Call_Rate Inst (+), ICI (-), Period (-) (69.17%)
# PC3 = Duration (+), BW (+) (89.51%)

# install.packages("devtools")
library(devtools)

## Loading required package: usethis

## 
## Attaching package: 'devtools'

## The following object is masked from 'package:permute':
## 
##     check

# devtools::install_github("arleyc/PCAtest")
library(PCAtest)
result<-PCAtest(PCA, 100, 100, 0.05, varcorr=FALSE, counter=FALSE, plot=TRUE)

## 
## Sampling bootstrap replicates... Please wait
## 
## Calculating confidence intervals of empirical statistics... Please wait
## 
## Sampling random permutations... Please wait
## 
## Comparing empirical statistics with their null distributions... Please wait
## 
## ========================================================
## Test of PCA significance: 8 variables, 62 observations
## 100 bootstrap replicates, 100 random permutations
## ========================================================
## 
## Empirical Psi = 10.5221, Max null Psi = 1.9740, Min null Psi = 0.4493, p-value = 0
## Empirical Phi = 0.4335, Max null Phi = 0.1878, Min null Phi = 0.0896, p-value = 0
## 
## Empirical eigenvalue #1 = 3.1037, Max null eigenvalue = 2.00862, p-value = 0
## Empirical eigenvalue #2 = 2.43, Max null eigenvalue = 1.50436, p-value = 0
## Empirical eigenvalue #3 = 1.62737, Max null eigenvalue = 1.37643, p-value = 0
## Empirical eigenvalue #4 = 0.51166, Max null eigenvalue = 1.19391, p-value = 1
## Empirical eigenvalue #5 = 0.26496, Max null eigenvalue = 1.07518, p-value = 1
## Empirical eigenvalue #6 = 0.0623, Max null eigenvalue = 0.88841, p-value = 1
## Empirical eigenvalue #7 = 0, Max null eigenvalue = 0.79225, p-value = 1
## Empirical eigenvalue #8 = 0, Max null eigenvalue = 0.70146, p-value = 1
## 
## PC 1 is significant and accounts for 38.8% (95%-CI:37.2-51) of the total variation
## PC 2 is significant and accounts for 30.4% (95%-CI:22.7-34.4) of the total variation
## PC 3 is significant and accounts for 20.3% (95%-CI:15.1-22.6) of the total variation
## 
## The first 3 PC axes are significant and account for 89.5% of the total variation
## 
## Variables 3, 5, 6, and 7 have significant loadings on PC 1
## Variables 1, 3, and 4 have significant loadings on PC 2
## Variables 2, and 8 have significant loadings on PC 3

fviz_pca_var(asprcomp,axes = c(1, 2))

Scores

scores = scores(asprcomp)
pcs <- as.data.frame(scores) # same as scores
full.data <- cbind(means, pcs)

library(devtools)
library(pracma)

## 
## Attaching package: 'pracma'

## The following object is masked from 'package:vegan':
## 
##     procrustes

## The following object is masked from 'package:car':
## 
##     logit

## The following objects are masked from 'package:Matrix':
## 
##     expm, lu, tril, triu

## The following object is masked from 'package:purrr':
## 
##     cross

rawLoadings     <- asprcomp$rotation[,1:3] %*% diag(asprcomp$sdev, 3, 3)
rotatedLoadings <- varimax(rawLoadings)$loadings
invLoadings     <- t(pracma::pinv(rotatedLoadings))
scores2          <- scale(PCA) %*% invLoadings

rotatedLoadings

## 
## Loadings:
##                [,1]   [,2]   [,3]  
## Call_Rate_Inst        -0.828 -0.311
## Duration       -0.256         0.843
## ICI                    0.965 -0.156
## Period                 0.972 -0.114
## Freq            0.976              
## Freq_5          0.946        -0.248
## Freq_95         0.988         0.102
## BW              0.125         0.873
## 
##                 [,1]  [,2]  [,3]
## SS loadings    2.914 2.569 1.678
## Proportion Var 0.364 0.321 0.210
## Cumulative Var 0.364 0.685 0.895

# from https://rpubs.com/esobolewska/pcr-step-by-step
pcs2 <- as.data.frame(scores2) # same as scores

# binding PC scores with the individual data (summarized for each individual)
full.data <- cbind(full.data, pcs2)

PC1

ggplot(data = full.data, aes(x = Infection, y = PC1))+
  geom_point()+
  theme_classic()+
  geom_smooth(method = 'lm',
              se = TRUE)+
  labs(x = "(Log) Fungal Load",
       y = "PC1 (Spectral Components)",
       title = "Fungal Load x P1")+
  theme(text = element_text(size = 12, family = "serif"))

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data = full.data, aes(x = Infection, y = V1))+
  geom_point()+
  theme_classic()+
  geom_smooth(method = 'lm',
              se = TRUE)+
  labs(x = "(Log) Fungal Load",
       y = "PC1 (Spectral Components)",
       title = "Fungal Load x V1")+
  theme(text = element_text(size = 12, family = "serif"))

## `geom_smooth()` using formula = 'y ~ x'

hist(full.data$V1)

descdist(full.data$V1, discrete = FALSE, boot = 500)

## summary statistics
## ------
## min:  -2.850035   max:  2.154455 
## median:  -0.08022708 
## mean:  -3.403416e-16 
## estimated sd:  1 
## estimated skewness:  -0.3132949 
## estimated kurtosis:  3.897399

# histogram looks normal, so use a normal regression with location as random variable

# first - regression with numerical variable load
lmodel_V1_load <- lm(V1~Infection, full.data)
summary(lmodel_V1_load)

## 
## Call:
## lm(formula = V1 ~ Infection, data = full.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8308 -0.4072 -0.0711  0.6937  2.2001 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.07053    0.18954   0.372    0.711
## Infection   -0.05741    0.11396  -0.504    0.616
## 
## Residual standard error: 1.006 on 60 degrees of freedom
## Multiple R-squared:  0.004212,   Adjusted R-squared:  -0.01238 
## F-statistic: 0.2538 on 1 and 60 DF,  p-value: 0.6163

# second - regression with categorical variable prevalence
lmodel_V1_prevalence <-lm(V1~Prevalence, full.data)
summary(lmodel_V1_prevalence)

## 
## Call:
## lm(formula = V1 ~ Prevalence, data = full.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.80901 -0.41666 -0.06607  0.67173  2.19548 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)         0.1003     0.2372   0.423    0.674
## PrevalencePresent  -0.1413     0.2815  -0.502    0.618
## 
## Residual standard error: 1.006 on 60 degrees of freedom
## Multiple R-squared:  0.004181,   Adjusted R-squared:  -0.01242 
## F-statistic: 0.2519 on 1 and 60 DF,  p-value: 0.6176

PC2

ggplot(data = full.data, aes(x = Infection, y = PC2))+
  geom_point()+
  theme_classic()+
  geom_smooth(method = 'lm',
              se = TRUE)+
  labs(x = "(Log) Fungal Load",
       y = "PC2 (Temporal Components)",
       title = "Fungal Load x PC2")+
  theme(text = element_text(size = 12, family = "serif"))

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data = full.data, aes(x = Infection, y = V2))+
  geom_point()+
  theme_classic()+
  geom_smooth(method = 'lm',
              se = TRUE)+
  labs(x = "(Log) Fungal Load",
       y = "PC2 (Temporal Components)",
       title = "Fungal Load x V2")+
  theme(text = element_text(size = 12, family = "serif"))

## `geom_smooth()` using formula = 'y ~ x'

hist(scores2[, 2])

# first - regression with numerical variable load
lmodel_V2_load <- lm(V2~Infection, full.data)
summary(lmodel_V2_load)

## 
## Call:
## lm(formula = V2 ~ Infection, data = full.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2725 -0.5461 -0.1083  0.2548  5.6762 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.05394    0.18971   0.284    0.777
## Infection   -0.04391    0.11406  -0.385    0.702
## 
## Residual standard error: 1.007 on 60 degrees of freedom
## Multiple R-squared:  0.002464,   Adjusted R-squared:  -0.01416 
## F-statistic: 0.1482 on 1 and 60 DF,  p-value: 0.7016

# second - regression with categorical variable prevalence
lmodel_V2_prevalence <-lm(V2~Prevalence, full.data)
summary(lmodel_V2_prevalence)

## 
## Call:
## lm(formula = V2 ~ Prevalence, data = full.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3327 -0.5253 -0.1624  0.2281  5.6296 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)        -0.1112     0.2370  -0.469    0.641
## PrevalencePresent   0.1567     0.2814   0.557    0.580
## 
## Residual standard error: 1.006 on 60 degrees of freedom
## Multiple R-squared:  0.005145,   Adjusted R-squared:  -0.01144 
## F-statistic: 0.3103 on 1 and 60 DF,  p-value: 0.5796

PC3

ggplot(data = full.data, aes(x = Infection, y = PC3))+
  geom_point()+
  theme_classic()+
  geom_smooth(method = 'lm',
              se = TRUE)+
  labs(x = "(Log) Fungal Load",
       y = "PC3 (Bandwidth & Duration Components)",
       title = "Fungal Load x PC3")+
  theme(text = element_text(size = 12, family = "serif"))

## `geom_smooth()` using formula = 'y ~ x'

# first - regression with numerical variable load
lmodel_PC3_load <- lm(PC3~Infection, full.data)
summary(lmodel_PC3_load)

## 
## Call:
## lm(formula = PC3 ~ Infection, data = full.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3466 -0.9083  0.0878  0.8033  2.4599 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.2109     0.2395   0.881    0.382
## Infection    -0.1717     0.1440  -1.192    0.238
## 
## Residual standard error: 1.271 on 60 degrees of freedom
## Multiple R-squared:  0.02314,    Adjusted R-squared:  0.006861 
## F-statistic: 1.421 on 1 and 60 DF,  p-value: 0.2379

Anova(lmodel_PC3_load)

## Anova Table (Type II tests)
## 
## Response: PC3
##           Sum Sq Df F value Pr(>F)
## Infection  2.297  1  1.4214 0.2379
## Residuals 96.973 60

lmodel_PC3_load_location <-lm(PC3~Infection*Location.x,full.data)
summary(lmodel_PC3_load_location)

## 
## Call:
## lm(formula = PC3 ~ Infection * Location.x, data = full.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.99457 -0.66330 -0.01938  0.97808  2.14122 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)            -0.29153    0.32789  -0.889   0.3776  
## Infection              -0.07553    0.18754  -0.403   0.6886  
## Location.xFR            0.96968    0.46212   2.098   0.0402 *
## Infection:Location.xFR -0.15405    0.27988  -0.550   0.5842  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.225 on 58 degrees of freedom
## Multiple R-squared:  0.1226, Adjusted R-squared:  0.07721 
## F-statistic: 2.701 on 3 and 58 DF,  p-value: 0.05383

ggplot(full.data,aes(x=Prevalence,y=PC3))+
  geom_boxplot(outlier.shape = NA)+
  geom_jitter(position=position_jitter(0.1))+
  theme_classic()

# second - regression with prevalence as factor
lmodel_PC3_prevalence <-lm(PC3~Prevalence,full.data)
summary(lmodel_PC3_prevalence)

## 
## Call:
## lm(formula = PC3 ~ Prevalence, data = full.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2528 -0.9130  0.0393  0.8982  2.5386 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)         0.3701     0.2978   1.243    0.219
## PrevalencePresent  -0.5215     0.3535  -1.475    0.145
## 
## Residual standard error: 1.264 on 60 degrees of freedom
## Multiple R-squared:  0.03499,    Adjusted R-squared:  0.01891 
## F-statistic: 2.176 on 1 and 60 DF,  p-value: 0.1454

ggplot(data = full.data, aes(x = Infection, y = V3))+
  geom_point()+
  theme_classic()+
  geom_smooth(method = 'lm',
              se = TRUE)+
  labs(x = "(Log) Fungal Load",
       y = "PC3 (Bandwidth & Duration Components)",
       title = "Fungal Load x V3")+
  theme(text = element_text(size = 12, family = "serif"))

## `geom_smooth()` using formula = 'y ~ x'

ggplot(full.data,aes(x =Infection,y = V3, group = Location.x, color = Location.x))+
  theme_classic()+
  geom_point()+
  geom_smooth(method = 'lm',
              se = FALSE)+
  scale_color_brewer(palette = "Set2")

## `geom_smooth()` using formula = 'y ~ x'

hist(scores2[, 3])

# first - regression with numerical variable load
lmodel_V3_load <- lm(V3~Infection, full.data)
summary(lmodel_V3_load)

## 
## Call:
## lm(formula = V3 ~ Infection, data = full.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1434 -0.6762  0.0298  0.7161  2.0619 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.1562     0.1880   0.831    0.409
## Infection    -0.1271     0.1130  -1.125    0.265
## 
## Residual standard error: 0.9978 on 60 degrees of freedom
## Multiple R-squared:  0.02066,    Adjusted R-squared:  0.004333 
## F-statistic: 1.265 on 1 and 60 DF,  p-value: 0.2651

Anova(lmodel_V3_load)

## Anova Table (Type II tests)
## 
## Response: V3
##           Sum Sq Df F value Pr(>F)
## Infection   1.26  1  1.2655 0.2651
## Residuals  59.74 60

# second - regression with categorical variable prevalence
lmodel_V3_prevalence <-lm(V3~Prevalence,full.data)
summary(lmodel_V3_prevalence)

## 
## Call:
## lm(formula = V3 ~ Prevalence, data = full.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.07470 -0.67891  0.01612  0.63391  2.05429 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)         0.2722     0.2340   1.163    0.249
## PrevalencePresent  -0.3835     0.2777  -1.381    0.172
## 
## Residual standard error: 0.9926 on 60 degrees of freedom
## Multiple R-squared:  0.0308, Adjusted R-squared:  0.01465 
## F-statistic: 1.907 on 1 and 60 DF,  p-value: 0.1724

ggplot(full.data,aes(x=Prevalence,y=V3))+
  geom_boxplot(outlier.shape = NA)+
  geom_jitter(position=position_jitter(0.1))+
  theme_classic()

# second - prevalence with location
lmodel_V3_prevalence_location <-lm(V3~Prevalence*Location.x,full.data)
summary(lmodel_V3_prevalence_location)

## 
## Call:
## lm(formula = V3 ~ Prevalence * Location.x, data = full.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.89204 -0.63098 -0.07066  0.75074  1.79591 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                     -0.1493     0.3194  -0.467   0.6420  
## PrevalencePresent               -0.1980     0.3767  -0.526   0.6012  
## Location.xFR                     0.8428     0.4517   1.866   0.0671 .
## PrevalencePresent:Location.xFR  -0.3486     0.5363  -0.650   0.5183  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9581 on 58 degrees of freedom
## Multiple R-squared:  0.1272, Adjusted R-squared:  0.08203 
## F-statistic: 2.817 on 3 and 58 DF,  p-value: 0.04693

ggplot(full.data,aes(x=Prevalence,y=V3))+
  facet_wrap(~Location.x,strip.position="bottom")+
  geom_boxplot(outlier.shape = NA)+
  geom_jitter(position=position_jitter(0.1))+
  theme_classic()

Exploring Duration and BW Componenets

ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, color = ID))+
  geom_point()+
  theme_classic()+
  theme(legend.position = "none")

ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, color = ID))+
  theme_classic()+
  theme(legend.position = "none")+
    geom_smooth(method = 'lm',
              se = FALSE)

## `geom_smooth()` using formula = 'y ~ x'

ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, color = `Log Infection`))+
  geom_point()+
  theme_classic()+
  geom_smooth(aes(group = ID), 
              method = "lm",
              se = FALSE)

## `geom_smooth()` using formula = 'y ~ x'

ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, group = ID, color = Location.x))+
  theme_classic()+
  theme(legend.position = "none")+
    geom_smooth(method = 'lm',
              se = FALSE)

## `geom_smooth()` using formula = 'y ~ x'

ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, color = Location.x))+
  geom_point()+
  theme_classic()+
    geom_smooth(method = 'lm',
              se = FALSE)

## `geom_smooth()` using formula = 'y ~ x'

ggplot(means, aes(Duration, BW, color = Location.x))+
  geom_point()+
  theme_classic()+
    geom_smooth(method = 'lm',
              se = FALSE)

## `geom_smooth()` using formula = 'y ~ x'

slope.model <- lm(BW ~ Duration * Location.x,means)
Anova(slope.model)

## Anova Table (Type II tests)
## 
## Response: BW
##                     Sum Sq Df F value    Pr(>F)    
## Duration             30558  1 17.0235 0.0001194 ***
## Location.x            2973  1  1.6560 0.2032537    
## Duration:Location.x   1283  1  0.7148 0.4013268    
## Residuals           104112 58                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Graphs pt 2

ggplot(full.data,aes(x =Infection,y = Duration))+
  theme_classic()+
  geom_point()+
  geom_smooth(method = 'lm',
              se = FALSE)+
  scale_color_brewer(palette = "Set2")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(full.data,aes(x =Infection,y = Duration, group = Location.x, color = Location.x))+
  theme_classic()+
  geom_point()+
  geom_smooth(method = 'lm',
              se = FALSE)+
  scale_color_brewer(palette = "Set2")

## `geom_smooth()` using formula = 'y ~ x'

lmodel_duration_load <-lm(Duration~Infection*Location.x,full.data)
summary(lmodel_duration_load)

## 
## Call:
## lm(formula = Duration ~ Infection * Location.x, data = full.data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.040505 -0.016103 -0.001656  0.013873  0.051570 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             1.041e-01  5.325e-03  19.553   <2e-16 ***
## Infection              -7.977e-05  3.046e-03  -0.026   0.9792    
## Location.xFR            1.340e-02  7.504e-03   1.785   0.0795 .  
## Infection:Location.xFR -4.064e-03  4.545e-03  -0.894   0.3750    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0199 on 58 degrees of freedom
## Multiple R-squared:  0.07236,    Adjusted R-squared:  0.02438 
## F-statistic: 1.508 on 3 and 58 DF,  p-value: 0.222

ggplot(full.data,aes(x =Infection,y = BW))+
  theme_classic()+
  geom_point()+
  geom_smooth(method = 'lm',
              se = FALSE)+
  scale_color_brewer(palette = "Set2")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(full.data,aes(x =Infection,y = BW, group = Location.x, color = Location.x))+
  theme_classic()+
  geom_point()+
  geom_smooth(method = 'lm',
              se = FALSE)+
  scale_color_brewer(palette = "Set2")

## `geom_smooth()` using formula = 'y ~ x'

lmodel_bw_load <-lm(BW~Infection*Location.x,full.data)
summary(lmodel_bw_load)

## 
## Call:
## lm(formula = BW ~ Infection * Location.x, data = full.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -112.643  -34.408    1.241   29.110   83.728 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             252.116     12.848  19.622   <2e-16 ***
## Infection                -6.549      7.349  -0.891    0.376    
## Location.xFR             19.379     18.108   1.070    0.289    
## Infection:Location.xFR    3.237     10.967   0.295    0.769    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48.02 on 58 degrees of freedom
## Multiple R-squared:  0.0778, Adjusted R-squared:  0.0301 
## F-statistic: 1.631 on 3 and 58 DF,  p-value: 0.192

Checking Correlations with PC3

ggplot(full.data, aes(x = BW, y = Duration, color = Infection))+
  geom_point()+
  theme_classic()+
  scale_color_gradient(low = 'firebrick2', high = 'dodgerblue')

ggplot(full.data, aes(x = BW, y = Duration, color = V3))+
  geom_point()+
  theme_classic()+
  scale_color_gradient(low = 'firebrick2', high = 'dodgerblue')

ggplot(full.data, aes(x = Infection, y = Duration))+
  geom_point()+
  theme_classic()+
  stat_smooth(method = 'lm', formula = y~x, se = FALSE)

ggplot(full.data, aes(x = Infection, y = BW))+
  geom_point()+
  theme_classic()+
  stat_smooth(method = 'lm', formula = y~x, se = FALSE)

ggplot(full.data, aes(x = V3, y = Duration))+
  geom_point()+
  theme_classic()+
  stat_smooth(method = 'lm', formula = y~x, se = FALSE)

ggplot(full.data, aes(x = V3, y = BW))+
  geom_point()+
  theme_classic()+
  stat_smooth(method = 'lm', formula = y~x, se = FALSE)

Linear Models for Infection / Bandwidth

# lm with duration
dur.load <- lm(Duration~Infection, full.data)
dur.prev <- lm(Duration~Prevalence, full.data)
dur.load.location <- lm(Duration~Infection*Location.x, full.data)
dur.prev.location <- lm(Duration~Prevalence*Location.x, full.data)

Anova(dur.load)

## Anova Table (Type II tests)
## 
## Response: Duration
##              Sum Sq Df F value Pr(>F)
## Infection 0.0003758  1  0.9247 0.3401
## Residuals 0.0243858 60

Anova(dur.prev)

## Anova Table (Type II tests)
## 
## Response: Duration
##               Sum Sq Df F value Pr(>F)
## Prevalence 0.0003036  1  0.7449 0.3915
## Residuals  0.0244580 60

Anova(dur.load.location)

## Anova Table (Type II tests)
## 
## Response: Duration
##                         Sum Sq Df F value Pr(>F)
## Infection            0.0002810  1  0.7095 0.4031
## Location.x           0.0010994  1  2.7760 0.1011
## Infection:Location.x 0.0003166  1  0.7993 0.3750
## Residuals            0.0229698 58

Anova(dur.prev.location)

## Anova Table (Type II tests)
## 
## Response: Duration
##                          Sum Sq Df F value  Pr(>F)  
## Prevalence            0.0002794  1  0.7058 0.40431  
## Location.x            0.0011700  1  2.9553 0.09093 .
## Prevalence:Location.x 0.0003261  1  0.8237 0.36787  
## Residuals             0.0229619 58                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# lm with bandwidth

bw.load <- lm(BW~Infection, full.data)
bw.prev <- lm(BW~Prevalence, full.data)
bw.load.location <- lm(BW~Infection*Location.x, full.data)
bw.prev.location <- lm(BW~Prevalence*Location.x, full.data)

Anova(bw.load)

## Anova Table (Type II tests)
## 
## Response: BW
##           Sum Sq Df F value Pr(>F)
## Infection   2714  1  1.1441 0.2891
## Residuals 142309 60

Anova(bw.prev)

## Anova Table (Type II tests)
## 
## Response: BW
##            Sum Sq Df F value Pr(>F)
## Prevalence   5012  1   2.148  0.148
## Residuals  140010 60

Anova(bw.load.location)

## Anova Table (Type II tests)
## 
## Response: BW
##                      Sum Sq Df F value  Pr(>F)  
## Infection              2012  1  0.8727 0.35409  
## Location.x             8369  1  3.6293 0.06173 .
## Infection:Location.x    201  1  0.0871 0.76891  
## Residuals            133739 58                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova(bw.prev.location)

## Anova Table (Type II tests)
## 
## Response: BW
##                       Sum Sq Df F value  Pr(>F)  
## Prevalence              4740  1  2.0952 0.15314  
## Location.x              8797  1  3.8889 0.05338 .
## Prevalence:Location.x      4  1  0.0018 0.96671  
## Residuals             131209 58                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TN Herp Presentation Figures

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`

Coefficients of Variation

# summarize cvs of these parameters by ID
cvs <- Tdat %>% 
  group_by(ID, Location.x, `Log Infection`, Prevalence) %>%
  summarize(Temp = cv(`Body Temp`),
            Call_Rate_Inst = cv(Call_Rate_Inst),
            Duration = cv(`Delta Time (s)`),
            ICI = cv(ICI),
            Period = cv(Call_Period),
            Freq = cv(`Peak Freq (Hz)`),
            Freq_5 = cv(`Freq 5% (Hz)`),
            Freq_95 = cv(`Freq 95% (Hz)`),
            BW = cv(`BW 90% (Hz)`))

## `summarise()` has grouped output by 'ID', 'Location.x', 'Log Infection'. You
## can override using the `.groups` argument.

cvs

## # A tibble: 62 × 13
## # Groups:   ID, Location.x, Log Infection [62]
##    ID        Location.x `Log Infection` Prevalence  Temp Call_Rate_Inst Duration
##    <chr>     <chr>                <dbl> <chr>      <dbl>          <dbl>    <dbl>
##  1 PC-002-24 FR                   0     Absent         0         0.103    0.0572
##  2 PC-003-24 FR                   0     Absent         0         0.161    0.0816
##  3 PC-004-24 CV                   1.01  Present        0         0.138    0.0710
##  4 PC-005-24 CV                   1.25  Present        0         0.169    0.0552
##  5 PC-006-24 CV                   2.11  Present        0         0.177    0.0688
##  6 PC-007-24 CV                   0     Absent         0         0.153    0.0835
##  7 PC-009-24 FR                   0     Absent         0         0.102    0.0721
##  8 PC-011-24 CV                   0     Absent         0         0.162    0.0323
##  9 PC-012-24 FR                   1.99  Present        0         0.0817   0.0773
## 10 PC-013-24 FR                   0.788 Present        0         0.101    0.0442
## # ℹ 52 more rows
## # ℹ 6 more variables: ICI <dbl>, Period <dbl>, Freq <dbl>, Freq_5 <dbl>,
## #   Freq_95 <dbl>, BW <dbl>

# print cvs as a separate csv file
write.csv(cvs,"peeper_infection_by_ID_cvs.csv",row.names=FALSE)

Analysis with CVs

Summary: no difference in CVs between populations or infected/uninfected individuals.

FRCVs <- cvs %>% filter(Location.x == 'FR')
CVCVs <- cvs %>% filter(Location.x == 'CV')
PresentCVs <- cvs %>% filter(Prevalence == 'Present')
AbsentCVs <- cvs %>% filter(Prevalence == 'Absent')

ggplot(cvs, aes(x = Location.x, y = Freq))+
  geom_boxplot()+
  theme_classic()

t.test(FRCVs$Freq, CVCVs$Freq)

## 
##  Welch Two Sample t-test
## 
## data:  FRCVs$Freq and CVCVs$Freq
## t = 1.0061, df = 56.337, p-value = 0.3187
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.001864403  0.005627529
## sample estimates:
##  mean of x  mean of y 
## 0.01257389 0.01069232

ggplot(cvs, aes(x = Prevalence, y = Freq))+
  geom_boxplot()+
  theme_classic()

t.test(PresentCVs$Freq, AbsentCVs$Freq)

## 
##  Welch Two Sample t-test
## 
## data:  PresentCVs$Freq and AbsentCVs$Freq
## t = -0.8019, df = 30.002, p-value = 0.4289
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.005988508  0.002611655
## sample estimates:
##  mean of x  mean of y 
## 0.01111257 0.01280099

ggplot(cvs, aes(x = Location.x, y = Freq_5))+
  geom_boxplot()+
  theme_classic()

t.test(FRCVs$Freq_5, CVCVs$Freq_5)

## 
##  Welch Two Sample t-test
## 
## data:  FRCVs$Freq_5 and CVCVs$Freq_5
## t = 0.013126, df = 59.204, p-value = 0.9896
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.003827815  0.003878369
## sample estimates:
##   mean of x   mean of y 
## 0.007648133 0.007622855

ggplot(cvs, aes(x = Prevalence, y = Freq_5))+
  geom_boxplot()+
  theme_classic()

t.test(PresentCVs$Freq_5, AbsentCVs$Freq_5)

## 
##  Welch Two Sample t-test
## 
## data:  PresentCVs$Freq_5 and AbsentCVs$Freq_5
## t = -0.42145, df = 35.976, p-value = 0.6759
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.004929489  0.003233257
## sample estimates:
##   mean of x   mean of y 
## 0.007388859 0.008236975

ggplot(cvs, aes(x = Location.x, y = Freq_95))+
  geom_boxplot()+
  theme_classic()

t.test(FRCVs$Freq_95, CVCVs$Freq_95)

## 
##  Welch Two Sample t-test
## 
## data:  FRCVs$Freq_95 and CVCVs$Freq_95
## t = -0.96275, df = 59.456, p-value = 0.3396
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.004726666  0.001655493
## sample estimates:
##   mean of x   mean of y 
## 0.007014720 0.008550306

ggplot(cvs, aes(x = Prevalence, y = Freq_95))+
  geom_boxplot()+
  theme_classic()

t.test(PresentCVs$Freq_95, AbsentCVs$Freq_95)

## 
##  Welch Two Sample t-test
## 
## data:  PresentCVs$Freq_95 and AbsentCVs$Freq_95
## t = -1.3481, df = 43.674, p-value = 0.1846
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.005164285  0.001025097
## sample estimates:
##   mean of x   mean of y 
## 0.007206430 0.009276024

ggplot(cvs, aes(x = Location.x, y = Call_Rate_Inst))+
  geom_boxplot()+
  theme_classic()

t.test(FRCVs$Call_Rate_Inst, CVCVs$Call_Rate_Inst)

## 
##  Welch Two Sample t-test
## 
## data:  FRCVs$Call_Rate_Inst and CVCVs$Call_Rate_Inst
## t = -0.9424, df = 58.369, p-value = 0.3499
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.08727154  0.03139609
## sample estimates:
## mean of x mean of y 
## 0.1977314 0.2256691

ggplot(cvs, aes(x = Prevalence, y = Call_Rate_Inst))+
  geom_boxplot()+
  theme_classic()

t.test(PresentCVs$Call_Rate_Inst, AbsentCVs$Call_Rate_Inst)

## 
##  Welch Two Sample t-test
## 
## data:  PresentCVs$Call_Rate_Inst and AbsentCVs$Call_Rate_Inst
## t = 0.95212, df = 44.067, p-value = 0.3462
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.0304617  0.0850225
## sample estimates:
## mean of x mean of y 
## 0.2200710 0.1927906

ggplot(cvs, aes(x = Location.x, y = ICI))+
  geom_boxplot()+
  theme_classic()

t.test(FRCVs$ICI, CVCVs$ICI)

## 
##  Welch Two Sample t-test
## 
## data:  FRCVs$ICI and CVCVs$ICI
## t = 1.1566, df = 58.855, p-value = 0.2521
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.0744415  0.2783666
## sample estimates:
## mean of x mean of y 
## 0.4564452 0.3544827

ggplot(cvs, aes(x = Prevalence, y = ICI))+
  geom_boxplot()+
  theme_classic()

t.test(PresentCVs$ICI, AbsentCVs$ICI)

## 
##  Welch Two Sample t-test
## 
## data:  PresentCVs$ICI and AbsentCVs$ICI
## t = 0.77809, df = 35.64, p-value = 0.4417
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1156235  0.2594857
## sample estimates:
## mean of x mean of y 
## 0.4247026 0.3527715

ggplot(cvs, aes(x = Location.x, y = Period))+
  geom_boxplot()+
  theme_classic()

t.test(FRCVs$Period, CVCVs$Period)

## 
##  Welch Two Sample t-test
## 
## data:  FRCVs$Period and CVCVs$Period
## t = 1.1045, df = 59.109, p-value = 0.2739
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.07392361  0.25607257
## sample estimates:
## mean of x mean of y 
## 0.4079865 0.3169120

ggplot(cvs, aes(x = Prevalence, y = Period))+
  geom_boxplot()+
  theme_classic()

t.test(PresentCVs$Period, AbsentCVs$Period)

## 
##  Welch Two Sample t-test
## 
## data:  PresentCVs$Period and AbsentCVs$Period
## t = 0.81432, df = 36.625, p-value = 0.4207
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1035769  0.2426950
## sample estimates:
## mean of x mean of y 
## 0.3811749 0.3116158

ggplot(cvs, aes(x = Location.x, y = Duration))+
  geom_boxplot()+
  theme_classic()

t.test(FRCVs$Duration, CVCVs$Duration)

## 
##  Welch Two Sample t-test
## 
## data:  FRCVs$Duration and CVCVs$Duration
## t = 1.8339, df = 45.181, p-value = 0.07326
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.001505714  0.032188842
## sample estimates:
##  mean of x  mean of y 
## 0.07392347 0.05858191

ggplot(cvs, aes(x = Prevalence, y = Duration))+
  geom_boxplot()+
  theme_classic()

t.test(PresentCVs$Duration, AbsentCVs$Duration)

## 
##  Welch Two Sample t-test
## 
## data:  PresentCVs$Duration and AbsentCVs$Duration
## t = 0.76379, df = 47.836, p-value = 0.4487
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.009741488  0.021674595
## sample estimates:
##  mean of x  mean of y 
## 0.06773747 0.06177092

ggplot(cvs, aes(x = Location.x, y = BW))+
  geom_boxplot()+
  theme_classic()

t.test(FRCVs$BW, CVCVs$BW)

## 
##  Welch Two Sample t-test
## 
## data:  FRCVs$BW and CVCVs$BW
## t = 0.071225, df = 59.62, p-value = 0.9435
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03088406  0.03316433
## sample estimates:
## mean of x mean of y 
## 0.1117418 0.1106017

ggplot(cvs, aes(x = Prevalence, y = BW))+
  geom_boxplot()+
  theme_classic()

t.test(PresentCVs$BW, AbsentCVs$BW)

## 
##  Welch Two Sample t-test
## 
## data:  PresentCVs$BW and AbsentCVs$BW
## t = -1.4231, df = 46.092, p-value = 0.1614
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.051086208  0.008768261
## sample estimates:
## mean of x mean of y 
## 0.1050104 0.1261694

CV Table

# use Excel to calculate the means and the sds of the CVs, then format it in a way that makes R happy
cv.table <- read_xlsx("~/Desktop/UTK/Tanner Lab/Analysis/Call Analysis/Peeper Analysis/Peeper_CVs_Means_SDs.xlsx")

ggplot(cv.table, aes(x=reorder(Variable, Means), y=Means,fill=Variable)) +
  geom_bar(stat="identity")+
  theme_classic()+
  theme(axis.text.x = element_text(angle = 50, vjust = 0.5, hjust=0.5),
        legend.position = "none")+
  geom_errorbar( aes(ymin = Min, ymax = Max), 
                 data = cv.table, width = 0.2, alpha = 0.3)+
  geom_text(aes(label=round(Means,4)), vjust=-0.3, size=3.5, alpha=1)+
  labs(x = "Variable",
       y = "CV",
       title = "Variables ordered by CV, with Min and Max CVs")+
  scale_color_brewer(palette = "Set2")

ggplot(cv.table, aes(x=reorder(Variable, Means), y=Means,fill=Variable)) +
  geom_bar(stat="identity")+
  theme_classic()+
  theme(axis.text.x = element_text(angle = 50, vjust = 0.5, hjust=0.5),
        legend.position = "none")+
  geom_errorbar( aes(ymin = Means - SDs, ymax = Means + SDs), 
                 data = cv.table, width = 0.2, alpha = 0.3)+
  geom_text(aes(label=round(Means,4)), vjust=-0.3, size=3.5, alpha=1)+
  labs(x = "Variable",
       y = "CV",
       title = "Variables ordered by CV, with SDs")+
  scale_color_brewer(palette = "Set2")

080724_Clean

Trina Chou

2024-08-07