Peeper Analysis!

NOTES ABOUT DATASETS

So in summary:

DATA = all 30 calls for all males, full set of data (1886 observations)

Tdat = the temperature corrected, all 30 calls (1886 observations)

means = the temperature corrected means for each male (62 observations)

PCA = the means for the variables of interest, one for each male with the ID as the row name (not an actual variable), prepped for input into PCA observations (62 observations)

full.data = means + PC scores for each individual (62 observations)

asprcomp = results of the PCA, run with the 62 observations with the individuals

Packages Used

# for importing Raven selection files
library(Rraven)
# for graphs
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(ggplot2)
library(RColorBrewer)
# for importing files
library(readr)
library(readxl)
# for linear regressions
library (lme4)

## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

library(car)

## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

library(fitdistrplus)

## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: survival

# for PCA
library (vegan)

## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-6.1

library('factoextra')

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

# for graphs
library(extrafont)

## Registering fonts with R

# function for calculating coefficient of variation
cv <- function(x) (sd(x)/mean(x))

# function that gets mode of a character value (e.g. which word is most common)
mode_char <- function(x) unique(x)[which.max(tabulate(match(x, unique(x))))]

Data Import (taken from ‘manual data’ in 071424 file)

DATA <- read_csv("~/Desktop/UTK/Tanner Lab/Analysis/Call Analysis/Peeper Analysis/080724_data.csv")

## Rows: 1886 Columns: 32
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Begin File, ID, Date.x, selec.file, Location.x, Date.y, Location.y...
## dbl (21): Begin Time (s), End Time (s), Delta Time (s), Peak Freq (Hz), Freq...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

DATA <- as.data.frame(DATA)

Corrections

Temperature

# Remember - if you change the name of the column for body temperature, you will need to edit the name within m & the Tcorrected_matrix to accurately reflect the new column name 

Tcorr <- function(col_numbers, target_temp, data){
  num_cols <- length(col_numbers)
  Tcorrected_matrix <- matrix(NA, nrow = nrow(data), ncol = num_cols)
  
  for(i in seq_along(col_numbers)){
    m <- lm(data[,col_numbers[i]] ~ data$`Body Temp`, data)$coefficients[2] # Calculate m by doing a linear regression 
    Tcorrected_matrix[, i] <- data[,col_numbers[i]] - m * (data$`Body Temp` - target_temp) # Save a vector so I can use it outside the function 
  }
  
  avg <- apply(Tcorrected_matrix, 1, mean)
  stdev <- apply(Tcorrected_matrix, 1, sd)
  colnames(Tcorrected_matrix) <- colnames(data[, col_numbers])
  
  result <- list(avg = avg, stdev = stdev, Tcorrected_matrix = Tcorrected_matrix)

  return(result)
}

# Set Values that you want to temperature correct for
#   3 = Delta Time
#   4 = Peak Freq
#   5 = Freq 5%
#   6 = Freq 95%
#   7 = BW 90%
#   13 = ICI
#   14 = Call Period
#   15 = Call Duration
selections <- c(3:7,13:15)

# Do the temperature correction to the target temp, aka 14
result <- Tcorr(selections, target_temp = 14, data = DATA)

# Format results as a dataframe
Tdat <- as.data.frame(result$Tcorrected_matrix)

# Add information such as ID, Location, and Prevalence
Tdat<-cbind(DATA[,c(9,10,12,18,23,31,32)], Tdat)
#   9 = ID
#   10 = Date
#   12 = Location
#   18 = Body Temp
#   23 = Frog Weight (g)
#   31 = Log Infection
#   32 = Prevalence

# write.csv(Tdat,"080724_temp_corrected_data.csv",row.names=FALSE)

Body Size

Bcorr <- function(col_numbers, target_svl, data){
  num_cols <- length(col_numbers)
  Bcorrected_matrix <- matrix(NA, nrow = nrow(data), ncol = num_cols)
  
  for(i in seq_along(col_numbers)){
    m <- lm(data[,col_numbers[i]] ~ DATA$SVL, data)$coefficients[2] # Calculate m by doing a linear regression 
    Bcorrected_matrix[, i] <- data[,col_numbers[i]] - m * (DATA$SVL - target_svl) # Save a vector so I can use it outside the function 
  }
  
  avg <- apply(Bcorrected_matrix, 1, mean)
  stdev <- apply(Bcorrected_matrix, 1, sd)
  colnames(Bcorrected_matrix) <- colnames(data[, col_numbers])
  
  result <- list(avg = avg, stdev = stdev, Bcorrected_matrix = Bcorrected_matrix)

  return(result)
}

# Do the correction to the target size, aka 27 SVL
result <- Bcorr(col_numbers = c(9:12), target_svl = 27, data = Tdat)

# Format results as a dataframe
Test <- as.data.frame(result$Bcorrected_matrix)

Tdat <- subset(Tdat, select = -c(9:12))
# Add information such as ID, Location, and Prevalence
Tdat<-cbind(Tdat,Test)

write.csv(Tdat,"080724_temp_body_corrected_data.csv",row.names=FALSE)

# summarize means of these parameters by ID
means <- Tdat %>% 
  group_by(ID, Location.x, Prevalence, `Frog Weight (g)`) %>%
  summarize(Temp = mean(`Body Temp`),
            Infection = mean(`Log Infection`),
            Call_Rate_Inst = mean(Call_Rate_Inst),
            Duration = mean(`Delta Time (s)`),
            ICI = mean(ICI),
            Period = mean(Call_Period),
            Freq = mean(`Peak Freq (Hz)`),
            Freq_5 = mean(`Freq 5% (Hz)`),
            Freq_95 = mean(`Freq 95% (Hz)`),
            BW = mean(`BW 90% (Hz)`))

## `summarise()` has grouped output by 'ID', 'Location.x', 'Prevalence'. You can
## override using the `.groups` argument.

means <- ungroup(means)
FR <- means %>% filter(Location.x=='FR')
CV <- means %>% filter(Location.x=='CV')

t.test(FR$Infection, CV$Infection)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Infection and CV$Infection
## t = -0.60249, df = 59.999, p-value = 0.5491
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.7500039  0.4027840
## sample estimates:
## mean of x mean of y 
##  1.138851  1.312461

t.test(FR$Call_Rate_Inst, CV$Call_Rate_Inst)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Call_Rate_Inst and CV$Call_Rate_Inst
## t = -1.9423, df = 55.051, p-value = 0.05723
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -14.0872147   0.2203254
## sample estimates:
## mean of x mean of y 
##  74.68630  81.61975

t.test(FR$Duration, CV$Duration)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Duration and CV$Duration
## t = 1.7322, df = 56.067, p-value = 0.08873
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.001373744  0.018938119
## sample estimates:
## mean of x mean of y 
## 0.1127904 0.1040082

t.test(FR$ICI, CV$ICI)

## 
##  Welch Two Sample t-test
## 
## data:  FR$ICI and CV$ICI
## t = 0.054917, df = 46.103, p-value = 0.9564
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1974357  0.2085117
## sample estimates:
## mean of x mean of y 
## 0.8597254 0.8541874

t.test(FR$Period, CV$Period)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Period and CV$Period
## t = 0.14294, df = 46.017, p-value = 0.887
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1873376  0.2159779
## sample estimates:
## mean of x mean of y 
## 0.9725158 0.9581956

t.test(FR$Freq, CV$Freq)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Freq and CV$Freq
## t = 1.0152, df = 49.687, p-value = 0.3149
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -28.79341  87.63243
## sample estimates:
## mean of x mean of y 
##  2943.453  2914.033

t.test(FR$Freq_5, CV$Freq_5)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Freq_5 and CV$Freq_5
## t = 0.35236, df = 54.354, p-value = 0.7259
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -50.56418  72.13100
## sample estimates:
## mean of x mean of y 
##  2804.422  2793.638

t.test(FR$Freq_95, CV$Freq_95)

## 
##  Welch Two Sample t-test
## 
## data:  FR$Freq_95 and CV$Freq_95
## t = 1.1473, df = 52.172, p-value = 0.2565
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -26.20089  96.17296
## sample estimates:
## mean of x mean of y 
##  3072.145  3037.159

t.test(FR$BW, CV$BW)

## 
##  Welch Two Sample t-test
## 
## data:  FR$BW and CV$BW
## t = 1.9928, df = 58.002, p-value = 0.05099
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1080514 48.5132778
## sample estimates:
## mean of x mean of y 
##  267.7228  243.5202

??select
PCA <- means %>% dplyr::select(c('Call_Rate_Inst','Infection','Duration','ICI','Period','Freq','Freq_5','Freq_95','BW'))
PCA <- as.data.frame(PCA)
rownames(PCA) <- c(means$ID)

PCA.full <- Tdat %>% dplyr::select(c('Call_Rate_Inst','Log Infection','Delta Time (s)','ICI','Call_Period','Peak Freq (Hz)','Freq 5% (Hz)','Freq 95% (Hz)','BW 90% (Hz)'))
PCA.full <- as.data.frame(PCA.full)

Correlations

cormat <- cor(Tdat[ , c(8:15)])
library(reshape2)

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

melted_cormat <- melt(cormat)

 get_upper_tri <- function(cormat){
    cormat[lower.tri(cormat)]<- NA
    return(cormat)
 }
 
upper_tri <- get_upper_tri(cormat)

upper_cormat <- melt(upper_tri, na.rm = TRUE)

ggplot(data = upper_cormat, aes(Var2, Var1, fill = value))+
 geom_tile(color = "white")+
 scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
   midpoint = 0, limit = c(-1,1), space = "Lab", 
   name="Pearson\nCorrelation") +
  theme_minimal()+ 
 theme(axis.text.x = element_text(angle = 45, vjust = 1, 
    size = 12, hjust = 1))+
 coord_fixed()+
  geom_text(aes(Var2, Var1, label = round(value,4)), color = "black", size = 3)

# results
# SPECTRAL: peak freq is highly correlated with freq 5% and freq 90%
# TEMPORAL: call period and ICI are almost perfectly correlated lol. both also negatively correlated with call rate (makes sense)

cor.test(Tdat$`Frog Weight (g)`, Tdat$`Freq 5% (Hz)`, method = c("pearson"))

## 
##  Pearson's product-moment correlation
## 
## data:  Tdat$`Frog Weight (g)` and Tdat$`Freq 5% (Hz)`
## t = -7.2475, df = 1884, p-value = 6.164e-13
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2082820 -0.1204528
## sample estimates:
##        cor 
## -0.1646938

PCA Analysis

PCA

asprcomp <- prcomp(PCA, scale = TRUE) # regular R version
asprcomp

## Standard deviations (1, .., p=9):
## [1] 1.763058e+00 1.559440e+00 1.296917e+00 9.717221e-01 7.150851e-01
## [6] 5.115198e-01 2.460488e-01 2.139402e-06 1.254242e-15
## 
## Rotation (n x k) = (9 x 9):
##                        PC1         PC2         PC3         PC4          PC5
## Call_Rate_Inst -0.21196158  0.48203170 -0.19657839  0.14738770 -0.383514498
## Infection      -0.04736242  0.03633548 -0.27081295 -0.95700028 -0.026166299
## Duration       -0.19785210 -0.14956536  0.58798983 -0.14023616  0.545198921
## ICI             0.29282582 -0.52013272 -0.14173100  0.03266812 -0.195102593
## Period          0.28469616 -0.53099854 -0.11293316  0.02579258 -0.168805929
## Freq            0.50183198  0.24232609  0.12477816 -0.08297010  0.007903299
## Freq_5          0.50345032  0.26274982 -0.04416357  0.01377631  0.243619142
## Freq_95         0.49107069  0.25129860  0.22199842 -0.05641808 -0.019896136
## BW             -0.02089951 -0.02339827  0.66531222 -0.17541911 -0.654691548
##                        PC6          PC7           PC8           PC9
## Call_Rate_Inst -0.71697287 -0.034900617 -4.496259e-07  1.552163e-16
## Infection      -0.06790084 -0.044183127 -8.850668e-08 -2.553097e-17
## Duration       -0.52397894  0.004974373 -1.877855e-07 -3.558077e-02
## ICI            -0.28501283 -0.026962646 -1.679128e-07 -7.088976e-01
## Period         -0.31329400 -0.026883025 -1.784455e-07  7.044134e-01
## Freq           -0.08049362  0.812679162  2.275804e-06  3.355827e-17
## Freq_5         -0.06856501 -0.390200525 -6.775339e-01  1.022936e-11
## Freq_95        -0.01762445 -0.419569904  6.829985e-01 -1.031159e-11
## BW              0.12613089 -0.081317883 -2.728752e-01  4.119601e-12

summary(asprcomp)

## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
## Standard deviation     1.7631 1.5594 1.2969 0.9717 0.71509 0.51152 0.24605
## Proportion of Variance 0.3454 0.2702 0.1869 0.1049 0.05682 0.02907 0.00673
## Cumulative Proportion  0.3454 0.6156 0.8025 0.9074 0.96420 0.99327 1.00000
##                              PC8       PC9
## Standard deviation     2.139e-06 1.254e-15
## Proportion of Variance 0.000e+00 0.000e+00
## Cumulative Proportion  1.000e+00 1.000e+00

get_eigenvalue(asprcomp)

##         eigenvalue variance.percent cumulative.variance.percent
## Dim.1 3.108373e+00     3.453747e+01                    34.53747
## Dim.2 2.431852e+00     2.702057e+01                    61.55805
## Dim.3 1.681993e+00     1.868881e+01                    80.24686
## Dim.4 9.442438e-01     1.049160e+01                    90.73845
## Dim.5 5.113467e-01     5.681630e+00                    96.42008
## Dim.6 2.616525e-01     2.907250e+00                    99.32733
## Dim.7 6.054002e-02     6.726668e-01                   100.00000
## Dim.8 4.577043e-12     5.085603e-11                   100.00000
## Dim.9 1.573122e-30     1.747913e-29                   100.00000

# PC1 = Freq (+), Freq_5 (+), Freq_95(+) (34.54%)
# PC2 = Call_Rate Inst (+), ICI (-), Period (-) (61.56%)
# PC3 = Duration (+), BW (+) (80.24%)
# PC4 = Infection (-) (90.73%)
scores = scores(asprcomp)

fviz_pca_var(asprcomp,axes = c(4, 1))

fviz_pca_var(asprcomp,axes = c(4, 2))

fviz_pca_var(asprcomp,axes = c(4, 3))

# from https://rpubs.com/esobolewska/pcr-step-by-step
pcs <- as.data.frame(scores) # same as scores

# binding PC scores with the individual data (summarized for each individual)
full.data <- cbind(means, pcs)

# install.packages("pracma")
library(pracma)

## 
## Attaching package: 'pracma'

## The following object is masked from 'package:vegan':
## 
##     procrustes

## The following object is masked from 'package:car':
## 
##     logit

## The following objects are masked from 'package:Matrix':
## 
##     expm, lu, tril, triu

## The following object is masked from 'package:purrr':
## 
##     cross

asprcomp <- prcomp(PCA, scale = TRUE)
rawLoadings     <- asprcomp$rotation[,1:3] %*% diag(asprcomp$sdev, 3, 3)
rotatedLoadings <- varimax(rawLoadings)$loadings
invLoadings     <- t(pracma::pinv(rotatedLoadings))
scores2          <- scale(PCA) %*% invLoadings

# from https://rpubs.com/esobolewska/pcr-step-by-step
pcs2 <- as.data.frame(scores2) # same as scores

# binding PC scores with the individual data (summarized for each individual)
full.data <- cbind(full.data, pcs2)

# install.packages("devtools")
library(devtools)

## Loading required package: usethis

## 
## Attaching package: 'devtools'

## The following object is masked from 'package:permute':
## 
##     check

# devtools::install_github("arleyc/PCAtest")
library(PCAtest)
result<-PCAtest(PCA, 100, 100, 0.05, varcorr=FALSE, counter=FALSE, plot=TRUE)

## 
## Sampling bootstrap replicates... Please wait
## 
## Calculating confidence intervals of empirical statistics... Please wait
## 
## Sampling random permutations... Please wait
## 
## Comparing empirical statistics with their null distributions... Please wait
## 
## ========================================================
## Test of PCA significance: 9 variables, 62 observations
## 100 bootstrap replicates, 100 random permutations
## ========================================================
## 
## Empirical Psi = 10.6302, Max null Psi = 2.3947, Min null Psi = 0.6731, p-value = 0
## Empirical Phi = 0.3842, Max null Phi = 0.1824, Min null Phi = 0.0967, p-value = 0
## 
## Empirical eigenvalue #1 = 3.10837, Max null eigenvalue = 2.06183, p-value = 0
## Empirical eigenvalue #2 = 2.43185, Max null eigenvalue = 1.64653, p-value = 0
## Empirical eigenvalue #3 = 1.68199, Max null eigenvalue = 1.40277, p-value = 0
## Empirical eigenvalue #4 = 0.94424, Max null eigenvalue = 1.25707, p-value = 1
## Empirical eigenvalue #5 = 0.51135, Max null eigenvalue = 1.07389, p-value = 1
## Empirical eigenvalue #6 = 0.26165, Max null eigenvalue = 0.98487, p-value = 1
## Empirical eigenvalue #7 = 0.06054, Max null eigenvalue = 0.84442, p-value = 1
## Empirical eigenvalue #8 = 0, Max null eigenvalue = 0.73536, p-value = 1
## Empirical eigenvalue #9 = 0, Max null eigenvalue = 0.59923, p-value = 1
## 
## PC 1 is significant and accounts for 34.5% (95%-CI:32.9-41.6) of the total variation
## PC 2 is significant and accounts for 27% (95%-CI:21.4-32.2) of the total variation
## PC 3 is significant and accounts for 18.7% (95%-CI:14.2-20.8) of the total variation
## 
## The first 3 PC axes are significant and account for 80.2% of the total variation
## 
## Variables 4, 5, 6, 7, and 8 have significant loadings on PC 1
## Variables 1, 4, and 5 have significant loadings on PC 2
## Variables 3, and 9 have significant loadings on PC 3

PC1

ggplot(data = full.data, aes(x = Infection, y = PC1))+
  geom_point()+
  theme_classic()+
  geom_smooth(method = 'lm',
              se = TRUE)+
  labs(x = "(Log) Fungal Load",
       y = "PC1 (Spectral Components)",
       title = "Fungal Load x PC1")+
  theme(text = element_text(size = 12, family = "serif"))

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data = full.data, aes(x = Infection, y = V1))+
  geom_point()+
  theme_classic()+
  geom_smooth(method = 'lm',
              se = TRUE)+
  labs(x = "(Log) Fungal Load",
       y = "PC1 (Spectral Components)",
       title = "Fungal Load x V1")+
  theme(text = element_text(size = 12, family = "serif"))

## `geom_smooth()` using formula = 'y ~ x'

hist(full.data$V1)

fit.norm <- fitdist(full.data$V1, "norm")
plot(fit.norm)

descdist(full.data$V1, discrete = FALSE, boot = 500)

## summary statistics
## ------
## min:  -2.673025   max:  2.073538 
## median:  -0.1302353 
## mean:  -3.434752e-16 
## estimated sd:  1 
## estimated skewness:  -0.1994627 
## estimated kurtosis:  3.668347

# histogram looks normal, so use a normal regression with location as random variable

# first - regression with numerical variable load
lmodel_V1_load <- lm(V1~Infection, full.data)
summary(lmodel_V1_load)

## 
## Call:
## lm(formula = V1 ~ Infection, data = full.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.64519 -0.48138 -0.03113  0.65669  2.13941 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.10189    0.18911   0.539    0.592
## Infection   -0.08295    0.11370  -0.730    0.469
## 
## Residual standard error: 1.004 on 60 degrees of freedom
## Multiple R-squared:  0.008792,   Adjusted R-squared:  -0.007728 
## F-statistic: 0.5322 on 1 and 60 DF,  p-value: 0.4685

# second - regression with categorical variable prevalence
lmodel_V1_prevalence <-lm(V1~Prevalence, full.data)
summary(lmodel_V1_prevalence)

## 
## Call:
## lm(formula = V1 ~ Prevalence, data = full.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.62271 -0.50701 -0.08252  0.64953  2.12385 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)         0.1230     0.2369   0.519    0.606
## PrevalencePresent  -0.1733     0.2812  -0.616    0.540
## 
## Residual standard error: 1.005 on 60 degrees of freedom
## Multiple R-squared:  0.00629,    Adjusted R-squared:  -0.01027 
## F-statistic: 0.3798 on 1 and 60 DF,  p-value: 0.5401

PC2

ggplot(data = full.data, aes(x = Infection, y = PC2))+
  geom_point()+
  theme_classic()+
  geom_smooth(method = 'lm',
              se = TRUE)+
  labs(x = "(Log) Fungal Load",
       y = "PC2 (Temporal Components)",
       title = "Fungal Load x PC2")+
  theme(text = element_text(size = 12, family = "serif"))

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data = full.data, aes(x = Infection, y = V2))+
  geom_point()+
  theme_classic()+
  geom_smooth(method = 'lm',
              se = TRUE)+
  labs(x = "(Log) Fungal Load",
       y = "PC2 (Temporal Components)",
       title = "Fungal Load x PC2")+
  theme(text = element_text(size = 12, family = "serif"))

## `geom_smooth()` using formula = 'y ~ x'

hist(scores2[, 2])

# first - regression with numerical variable load
lmodel_V2_load <- lm(V2~Infection, full.data)
summary(lmodel_V2_load)

## 
## Call:
## lm(formula = V2 ~ Infection, data = full.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.7244 -0.2441  0.0942  0.5079  1.2516 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.06572    0.18960  -0.347    0.730
## Infection    0.05350    0.11399   0.469    0.641
## 
## Residual standard error: 1.006 on 60 degrees of freedom
## Multiple R-squared:  0.003658,   Adjusted R-squared:  -0.01295 
## F-statistic: 0.2203 on 1 and 60 DF,  p-value: 0.6405

# second - regression with categorical variable prevalence
lmodel_V2_prevalence <-lm(V2~Prevalence, full.data)
summary(lmodel_V2_prevalence)

## 
## Call:
## lm(formula = V2 ~ Prevalence, data = full.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.6807 -0.2164  0.1115  0.5308  1.2678 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)         0.1034     0.2371   0.436    0.664
## PrevalencePresent  -0.1458     0.2815  -0.518    0.606
## 
## Residual standard error: 1.006 on 60 degrees of freedom
## Multiple R-squared:  0.00445,    Adjusted R-squared:  -0.01214 
## F-statistic: 0.2682 on 1 and 60 DF,  p-value: 0.6065

PC3

ggplot(data = full.data, aes(x = Infection, y = PC3))+
  geom_point()+
  theme_classic()+
  geom_smooth(method = 'lm',
              se = TRUE)+
  labs(x = "(Log) Fungal Load",
       y = "PC3 (Bandwidth & Duration Components)",
       title = "Fungal Load x PC3")+
  theme(text = element_text(size = 12, family = "serif"))

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data = full.data, aes(x = Infection, y = V3))+
  geom_point()+
  theme_classic()+
  geom_smooth(method = 'lm',
              se = TRUE)+
  labs(x = "(Log) Fungal Load",
       y = "PC3 (Bandwidth & Duration Components)",
       title = "Fungal Load x PC3")+
  theme(text = element_text(size = 12, family = "serif"))

## `geom_smooth()` using formula = 'y ~ x'

ggplot(data = full.data, aes(x = Infection, y = PC3, color = Prevalence))+
  geom_point()+
  theme_classic()+
  geom_smooth(method = 'lm',
              se = FALSE)+
  labs(x = "(Log) Fungal Load",
       y = "PC3 (Bandwidth & Duration Components)",
       title = "Fungal Load x PC3")

## `geom_smooth()` using formula = 'y ~ x'

hist(scores2[, 3])

# first - regression with numerical variable load
lmodel_V3_load <- lm(V3~Infection, full.data)
summary(lmodel_V3_load)

## 
## Call:
## lm(formula = V3 ~ Infection, data = full.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.18049 -0.65659  0.06561  0.69576  1.94017 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   0.3781     0.1781   2.124  0.03784 * 
## Infection    -0.3078     0.1071  -2.875  0.00558 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9453 on 60 degrees of freedom
## Multiple R-squared:  0.1211, Adjusted R-squared:  0.1064 
## F-statistic: 8.266 on 1 and 60 DF,  p-value: 0.005581

# second - regression with categorical variable prevalence
lmodel_PC3_prevalence <-lm(PC3~Prevalence,full.data)
plot(lmodel_PC3_prevalence)

summary(lmodel_PC3_prevalence)

## 
## Call:
## lm(formula = PC3 ~ Prevalence, data = full.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.98405 -0.87510  0.03902  0.92960  2.74680 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)         0.6542     0.2915   2.245  0.02849 * 
## PrevalencePresent  -0.9218     0.3460  -2.664  0.00989 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.237 on 60 degrees of freedom
## Multiple R-squared:  0.1058, Adjusted R-squared:  0.0909 
## F-statistic: 7.099 on 1 and 60 DF,  p-value: 0.00989

ggplot(full.data,aes(x=Prevalence,y=V3))+
  geom_boxplot(outlier.shape = NA)+
  geom_jitter(position=position_jitter(0.1))+
  theme_classic()

ggplot(full.data,aes(x=Prevalence,y=V3))+
  facet_wrap(~Location.x,strip.position="bottom")+
  geom_boxplot(outlier.shape = NA)+
  geom_jitter(position=position_jitter(0.1))+
  theme_classic()

ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, color = ID))+
  geom_point()+
  theme_classic()+
  theme(legend.position = "none")

ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, color = ID))+
  theme_classic()+
  theme(legend.position = "none")+
    geom_smooth(method = 'lm',
              se = FALSE)

## `geom_smooth()` using formula = 'y ~ x'

ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, group = ID, color = Location.x))+
  theme_classic()+
  theme(legend.position = "none")+
    geom_smooth(method = 'lm',
              se = FALSE)

## `geom_smooth()` using formula = 'y ~ x'

ggplot(full.data,aes(x =Infection,y = V3, group = Location.x, color = Location.x))+
  theme_classic()+
  geom_point()+
  geom_smooth(method = 'lm',
              se = FALSE)+
  scale_color_brewer(palette = "Set2")

## `geom_smooth()` using formula = 'y ~ x'

Breaking down by BW and Duration

Graphs

ggplot(full.data,aes(x =Infection,y = Duration))+
  theme_classic()+
  geom_point()+
  geom_smooth(method = 'lm',
              se = FALSE)+
  scale_color_brewer(palette = "Set2")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(full.data,aes(x =Infection,y = Duration, group = Location.x, color = Location.x))+
  theme_classic()+
  geom_point()+
  geom_smooth(method = 'lm',
              se = FALSE)+
  scale_color_brewer(palette = "Set2")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(full.data,aes(x =Infection,y = BW))+
  theme_classic()+
  geom_point()+
  geom_smooth(method = 'lm',
              se = FALSE)+
  scale_color_brewer(palette = "Set2")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(full.data,aes(x =Infection,y = BW, group = Location.x, color = Location.x))+
  theme_classic()+
  geom_point()+
  geom_smooth(method = 'lm',
              se = FALSE)+
  scale_color_brewer(palette = "Set2")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(full.data, aes(x = BW, y = Duration, color = Infection))+
  geom_point()+
  theme_classic()+
  scale_color_gradient(low = 'firebrick2', high = 'dodgerblue')

ggplot(full.data, aes(x = Infection, y = Duration))+
  geom_point()+
  theme_classic()+
  stat_smooth(method = 'lm', formula = y~x, se = FALSE)

ggplot(full.data, aes(x = PC1, y = Duration))+
  geom_point()+
  theme_classic()+
  stat_smooth(method = 'lm', formula = y~x, se = FALSE)

ggplot(full.data, aes(x = PC1, y = BW))+
  geom_point()+
  theme_classic()+
  stat_smooth(method = 'lm', formula = y~x, se = FALSE)

Linear Models

# lm with duration
dur.load <- lm(Duration~Infection, full.data)
dur.prev <- lm(Duration~Prevalence, full.data)
dur.load.location <- lm(Duration~Infection*Location.x, full.data)
dur.prev.location <- lm(Duration~Prevalence*Location.x, full.data)

summary(dur.load)

## 
## Call:
## lm(formula = Duration ~ Infection, data = full.data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.037983 -0.015713  0.000237  0.014621  0.053128 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.110955   0.003798  29.216   <2e-16 ***
## Infection   -0.002196   0.002283  -0.962     0.34    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02016 on 60 degrees of freedom
## Multiple R-squared:  0.01518,    Adjusted R-squared:  -0.001236 
## F-statistic: 0.9247 on 1 and 60 DF,  p-value: 0.3401

summary(dur.prev)

## 
## Call:
## lm(formula = Duration ~ Prevalence, data = full.data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.038416 -0.015547 -0.000023  0.014501  0.051607 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.111718   0.004759  23.476   <2e-16 ***
## PrevalencePresent -0.004875   0.005649  -0.863    0.392    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02019 on 60 degrees of freedom
## Multiple R-squared:  0.01226,    Adjusted R-squared:  -0.0042 
## F-statistic: 0.7449 on 1 and 60 DF,  p-value: 0.3915

summary(dur.load.location)

## 
## Call:
## lm(formula = Duration ~ Infection * Location.x, data = full.data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.040505 -0.016103 -0.001656  0.013873  0.051570 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             1.041e-01  5.325e-03  19.553   <2e-16 ***
## Infection              -7.977e-05  3.046e-03  -0.026   0.9792    
## Location.xFR            1.340e-02  7.504e-03   1.785   0.0795 .  
## Infection:Location.xFR -4.064e-03  4.545e-03  -0.894   0.3750    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0199 on 58 degrees of freedom
## Multiple R-squared:  0.07236,    Adjusted R-squared:  0.02438 
## F-statistic: 1.508 on 3 and 58 DF,  p-value: 0.222

summary(dur.prev.location)

## 
## Call:
## lm(formula = Duration ~ Prevalence * Location.x, data = full.data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.041425 -0.015915 -0.001479  0.014708  0.048599 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.1037859  0.0066324  15.648   <2e-16 ***
## PrevalencePresent               0.0003093  0.0078231   0.040   0.9686    
## Location.xFR                    0.0158635  0.0093796   1.691   0.0962 .  
## PrevalencePresent:Location.xFR -0.0101078  0.0111374  -0.908   0.3679    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0199 on 58 degrees of freedom
## Multiple R-squared:  0.07268,    Adjusted R-squared:  0.02472 
## F-statistic: 1.515 on 3 and 58 DF,  p-value: 0.2201

# lm with bandwidth

bw.load <- lm(BW~Infection, full.data)
bw.prev <- lm(BW~Prevalence, full.data)
bw.load.location <- lm(BW~Infection*Location.x, full.data)
bw.prev.location <- lm(BW~Prevalence*Location.x, full.data)

summary(bw.load)

## 
## Call:
## lm(formula = BW ~ Infection, data = full.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -103.043  -28.328    5.809   26.109   93.602 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  262.479      9.174   28.61   <2e-16 ***
## Infection     -5.900      5.516   -1.07    0.289    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48.7 on 60 degrees of freedom
## Multiple R-squared:  0.01871,    Adjusted R-squared:  0.002357 
## F-statistic: 1.144 on 1 and 60 DF,  p-value: 0.2891

summary(bw.prev)

## 
## Call:
## lm(formula = BW ~ Prevalence, data = full.data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -91.376 -32.640   1.494  27.373 104.643 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         269.29      11.39  23.651   <2e-16 ***
## PrevalencePresent   -19.81      13.52  -1.466    0.148    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48.31 on 60 degrees of freedom
## Multiple R-squared:  0.03456,    Adjusted R-squared:  0.01847 
## F-statistic: 2.148 on 1 and 60 DF,  p-value: 0.148

summary(bw.load.location)

## 
## Call:
## lm(formula = BW ~ Infection * Location.x, data = full.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -112.643  -34.408    1.241   29.110   83.728 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             252.116     12.848  19.622   <2e-16 ***
## Infection                -6.549      7.349  -0.891    0.376    
## Location.xFR             19.379     18.108   1.070    0.289    
## Infection:Location.xFR    3.237     10.967   0.295    0.769    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48.02 on 58 degrees of freedom
## Multiple R-squared:  0.0778, Adjusted R-squared:  0.0301 
## F-statistic: 1.631 on 3 and 58 DF,  p-value: 0.192

summary(bw.prev.location)

## 
## Call:
## lm(formula = BW ~ Prevalence * Location.x, data = full.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -104.008  -34.234   -2.317   28.765   92.011 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     257.764     15.854  16.258   <2e-16 ***
## PrevalencePresent               -19.817     18.701  -1.060    0.294    
## Location.xFR                     23.050     22.421   1.028    0.308    
## PrevalencePresent:Location.xFR    1.116     26.623   0.042    0.967    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47.56 on 58 degrees of freedom
## Multiple R-squared:  0.09525,    Adjusted R-squared:  0.04846 
## F-statistic: 2.035 on 3 and 58 DF,  p-value: 0.1189

Presentation Figures

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'

## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`

Summary Data

DATA %>% summarise(across(c(18,21:23), list(mean = ~mean(., na.rm = T),
                               sd = ~sd(., na.rm = T)
                               )))

##   Body Temp_mean Body Temp_sd SVL_mean   SVL_sd Tibia_mean  Tibia_sd
## 1       15.09512     3.437289 27.52057 1.784544   13.92479 0.9396512
##   Frog Weight (g)_mean Frog Weight (g)_sd
## 1             1.522853          0.3224575

# Body Temp
# Frog Weight (g)
# SVL
# Tibia

080724_Clean

Trina Chou

2024-08-07