BIOL 458 Final Project Frog

Investigating the relationship between parasite / pathogen (BdZE) loads and frogs body size (SVL).

# Setting up necessary library and importing data from the two studies
library(tidyverse)
Warning: package 'purrr' was built under R version 4.5.1
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(patchwork)
Warning: package 'patchwork' was built under R version 4.5.2
sfsu_frog <- read.csv("SFSU_FrogsDisease_Data.csv")

anhui_frog_1 <- read.csv("265_2025_3654_MOESM2_ESM.csv")

anhui_frog_2 <- read.csv("265_2025_3654_MOESM4_ESM.csv")

anhui_frog_3 <- read.csv("265_2025_3654_MOESM3_ESM.csv")

Check the data structure and format

summary(sfsu_frog)
      ID              FileCall           Location          Call.Type        
 Length:484         Length:484         Length:484         Length:484        
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
 Bd.Positive          Water.Temp        BdZE                SVL       
 Length:484         Min.   :10.0   Min.   :     0.00   Min.   :31.20  
 Class :character   1st Qu.:13.0   1st Qu.:    37.73   1st Qu.:33.30  
 Mode  :character   Median :14.0   Median :  7580.19   Median :34.90  
                    Mean   :14.5   Mean   : 42917.30   Mean   :35.18  
                    3rd Qu.:16.0   3rd Qu.: 43239.52   3rd Qu.:36.30  
                    Max.   :22.0   Max.   :549920.00   Max.   :41.10  
      Mass            Call       Call.Duration    InterCall.Interval
 Min.   :2.000   Min.   :1.000   Min.   :0.1438   Min.   :0.3346    
 1st Qu.:3.200   1st Qu.:3.000   1st Qu.:0.2040   1st Qu.:0.7102    
 Median :3.400   Median :5.000   Median :0.2232   Median :0.8834    
 Mean   :3.631   Mean   :4.926   Mean   :0.2259   Mean   :0.9204    
 3rd Qu.:3.900   3rd Qu.:7.000   3rd Qu.:0.2490   3rd Qu.:1.0938    
 Max.   :6.500   Max.   :9.000   Max.   :0.3653   Max.   :1.9883    
 Call.Duty.Cycle    Call.Rate       Call.Effort     Call.Dominant.Frequency
 Min.   :0.0960   Min.   :0.4309   Min.   : 371.3   Min.   :1680           
 1st Qu.:0.1709   1st Qu.:0.7545   1st Qu.: 737.0   1st Qu.:2067           
 Median :0.2032   Median :0.9019   Median : 968.7   Median :2196           
 Mean   :0.2082   Mean   :0.9431   Mean   : 973.7   Mean   :2162           
 3rd Qu.:0.2389   3rd Qu.:1.0604   3rd Qu.:1175.7   3rd Qu.:2282           
 Max.   :0.3454   Max.   :2.0574   Max.   :1975.1   Max.   :2730           
 Total.N.Pulses  P1.Peak.Pulse.Rate
 Min.   :10.00   Min.   :  8.51    
 1st Qu.:15.00   1st Qu.: 85.82    
 Median :17.00   Median : 92.08    
 Mean   :17.25   Mean   : 94.67    
 3rd Qu.:20.00   3rd Qu.:100.03    
 Max.   :24.00   Max.   :152.21    
summary(anhui_frog_1)
    forg_id          SVL           parasite      calling_rates         f0      
 Min.   : 1.0   Min.   :4.440   Min.   : 0.000   Min.   : 51.0   Min.   :2602  
 1st Qu.:19.5   1st Qu.:4.900   1st Qu.: 0.000   1st Qu.:100.0   1st Qu.:3255  
 Median :38.0   Median :5.160   Median : 1.000   Median :144.0   Median :3598  
 Mean   :38.0   Mean   :5.162   Mean   : 1.933   Mean   :156.9   Mean   :3636  
 3rd Qu.:56.5   3rd Qu.:5.455   3rd Qu.: 3.000   3rd Qu.:202.0   3rd Qu.:3942  
 Max.   :75.0   Max.   :5.820   Max.   :13.000   Max.   :382.0   Max.   :5362  
      SPL             Tem              RH         NLP_content    
 Min.   :65.00   Min.   : 9.50   Min.   :83.00   Min.   : 0.900  
 1st Qu.:71.00   1st Qu.:15.75   1st Qu.:88.70   1st Qu.: 4.150  
 Median :73.00   Median :17.00   Median :92.00   Median : 5.900  
 Mean   :72.84   Mean   :16.79   Mean   :91.31   Mean   : 6.629  
 3rd Qu.:75.00   3rd Qu.:18.70   3rd Qu.:93.90   3rd Qu.: 8.450  
 Max.   :79.00   Max.   :21.60   Max.   :98.90   Max.   :16.700  
 call_duration       X          
 Min.   :  82.0   Mode:logical  
 1st Qu.: 312.5   NA's:75       
 Median : 473.0                 
 Mean   : 535.3                 
 3rd Qu.: 708.0                 
 Max.   :1140.0                 
 Title..The.impact.of.parasitic.infection.on.nonlinear.vocalizations.and.mate.choice.in.two.torrent.frogs
 Length:75                                                                                               
 Class :character                                                                                        
 Mode  :character                                                                                        
                                                                                                         
                                                                                                         
                                                                                                         
     X.1           
 Length:75         
 Class :character  
 Mode  :character  
                   
                   
                   

Test if the BdZE is normally distributed using ggpubr library

## install.packages("ggpubr")
library(ggpubr)
Warning: package 'ggpubr' was built under R version 4.5.2
ggdensity(sfsu_frog$BdZE)

ggqqplot(sfsu_frog$BdZE)

sfsu_frog$log_BdZE <- log(sfsu_frog$BdZE)
ggdensity(sfsu_frog$log_BdZE)
Warning: Removed 70 rows containing non-finite outside the scale range
(`stat_density()`).

ggqqplot(sfsu_frog$log_BdZE)
Warning: Removed 70 rows containing non-finite outside the scale range
(`stat_qq()`).
Warning: Removed 70 rows containing non-finite outside the scale range
(`stat_qq_line()`).
Removed 70 rows containing non-finite outside the scale range
(`stat_qq_line()`).

Test if parasite count is normally distributed

ggdensity(anhui_frog_1$parasite)

ggqqplot(anhui_frog_1$parasite)

plotting the relationship between SVL and BdZe in the sfsu study

ggplot(data = sfsu_frog,
       mapping = aes(x = SVL, y = BdZE)) + 
        geom_point() + geom_smooth(method = "lm") 
`geom_smooth()` using formula = 'y ~ x'

Try to log transform it and then plot to see if they correlate

ggplot(data = sfsu_frog,
       mapping = aes(x = SVL, y = log_BdZE)) +
         geom_point() + geom_smooth(method = "lm") 
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 70 rows containing non-finite outside the scale range
(`stat_smooth()`).

log transforming the SVL as well to see the correlation

sfsu_frog$log_SVL <- log(sfsu_frog$SVL)
ggplot(data = sfsu_frog,
       mapping = aes(x = log_SVL, y = log_BdZE)) +
         geom_point() + geom_smooth(method = "lm") 
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 70 rows containing non-finite outside the scale range
(`stat_smooth()`).

Plotting SVL vs parasite count in the Anhui Study

ggplot(data = anhui_frog_1,
       mapping = aes(x = SVL, y = parasite)) +
         geom_point() + geom_smooth(method = "lm") 
`geom_smooth()` using formula = 'y ~ x'

As shown above, there is a positive correlation between SVL and parasite count in the Anhui Study, meaning in that specific sample, the larger the size of the frog, the more parasites it has.

Plotting the relation between SVL and Call Duration

sfsu1 <- ggplot(data = sfsu_frog,
       mapping = aes(x = SVL, y = Call.Duration)) +
         geom_point() + geom_smooth(method = "lm") 
sfsu1
`geom_smooth()` using formula = 'y ~ x'

As we can see, there is a positive relationship between SVL and Call Duration, meaning larger the frog, longer their call duration in the SFSU sample.

Now, lets see if the same relationship exist in the Anhui study.

anhui1 <- ggplot(data = anhui_frog_1,
       mapping = aes(x = SVL, y = call_duration)) +
         geom_point() + geom_smooth(method = "lm") 

anhui1
`geom_smooth()` using formula = 'y ~ x'

Interestingly, there is not much correlation between SVL and call_duration, a slightly positive one at most. Lets see the other specie they studied.

anhui2 <- ggplot(data = anhui_frog_2,
       mapping = aes(x = SVL, y = call_duration)) +
         geom_point() + geom_smooth(method = "lm") 

anhui2
`geom_smooth()` using formula = 'y ~ x'

For the other species, again, it is a rather flat regression line, however, it slightly trends downward.

Let’s see if SVL correlates with other call attributes.

sfsu2 <- ggplot(data = sfsu_frog,
       mapping = aes(x = SVL, y = Call.Rate)) +
         geom_point() + geom_smooth(method = "lm") 

sfsu3 <- ggplot(data = sfsu_frog,
       mapping = aes(x = SVL, y = Call.Effort)) +
         geom_point() + geom_smooth(method = "lm")

sfsu4 <- ggplot(data = sfsu_frog,
       mapping = aes(x = SVL, y = Call.Duty.Cycle )) +
         geom_point() + geom_smooth(method = "lm")

sfsu5 <- ggplot(data = sfsu_frog,
       mapping = aes(x = SVL, y = InterCall.Interval )) +
         geom_point() + geom_smooth(method = "lm")

sfsu2 + sfsu3 + sfsu4 + sfsu5
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

As we can see, only call duty cycle has an obvious correlation with SVL. Call rates and call effort has a flat or slightly upward regression line, while Intercall Interval has a slightly downward trend line.