So in summary:
DATA = all 30 calls for all males, full set of data (1886 observations)
Tdat = the temperature corrected, all 30 calls (1886 observations)
means = the temperature corrected means for each male (62 observations)
PCA = the means for the variables of interest, one for each male with the ID as the row name (not an actual variable), prepped for input into PCA observations (62 observations)
full.data = means + PC scores for each individual (62 observations)
asprcomp = results of the PCA, run with the 62 observations with the individuals
# for importing Raven selection files
library(Rraven)
# for graphs
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(RColorBrewer)
# for importing files
library(readr)
library(readxl)
# for linear regressions
library (lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(fitdistrplus)
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: survival
# for PCA
library (vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-6.1
library('factoextra')
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# for graphs
library(extrafont)
## Registering fonts with R
# function for calculating coefficient of variation
cv <- function(x) (sd(x)/mean(x))
# function that gets mode of a character value (e.g. which word is most common)
mode_char <- function(x) unique(x)[which.max(tabulate(match(x, unique(x))))]
DATA <- read_csv("~/Desktop/UTK/Tanner Lab/Analysis/Call Analysis/Peeper Analysis/080724_data.csv")
## Rows: 1886 Columns: 32
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Begin File, ID, Date.x, selec.file, Location.x, Date.y, Location.y...
## dbl (21): Begin Time (s), End Time (s), Delta Time (s), Peak Freq (Hz), Freq...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
DATA <- as.data.frame(DATA)
# Remember - if you change the name of the column for body temperature, you will need to edit the name within m & the Tcorrected_matrix to accurately reflect the new column name
Tcorr <- function(col_numbers, target_temp, data){
num_cols <- length(col_numbers)
Tcorrected_matrix <- matrix(NA, nrow = nrow(data), ncol = num_cols)
for(i in seq_along(col_numbers)){
m <- lm(data[,col_numbers[i]] ~ data$`Body Temp`, data)$coefficients[2] # Calculate m by doing a linear regression
Tcorrected_matrix[, i] <- data[,col_numbers[i]] - m * (data$`Body Temp` - target_temp) # Save a vector so I can use it outside the function
}
avg <- apply(Tcorrected_matrix, 1, mean)
stdev <- apply(Tcorrected_matrix, 1, sd)
colnames(Tcorrected_matrix) <- colnames(data[, col_numbers])
result <- list(avg = avg, stdev = stdev, Tcorrected_matrix = Tcorrected_matrix)
return(result)
}
# Set Values that you want to temperature correct for
# 3 = Delta Time
# 4 = Peak Freq
# 5 = Freq 5%
# 6 = Freq 95%
# 7 = BW 90%
# 13 = ICI
# 14 = Call Period
# 15 = Call Duration
selections <- c(3:7,13:15)
# Do the temperature correction to the target temp, aka 14
result <- Tcorr(selections, target_temp = 14, data = DATA)
# Format results as a dataframe
Tdat <- as.data.frame(result$Tcorrected_matrix)
# Add information such as ID, Location, and Prevalence
Tdat<-cbind(DATA[,c(9,10,12,18,23,31,32)], Tdat)
# 9 = ID
# 10 = Date
# 12 = Location
# 18 = Body Temp
# 23 = Frog Weight (g)
# 31 = Log Infection
# 32 = Prevalence
# write.csv(Tdat,"080724_temp_corrected_data.csv",row.names=FALSE)
Bcorr <- function(col_numbers, target_svl, data){
num_cols <- length(col_numbers)
Bcorrected_matrix <- matrix(NA, nrow = nrow(data), ncol = num_cols)
for(i in seq_along(col_numbers)){
m <- lm(data[,col_numbers[i]] ~ DATA$SVL, data)$coefficients[2] # Calculate m by doing a linear regression
Bcorrected_matrix[, i] <- data[,col_numbers[i]] - m * (DATA$SVL - target_svl) # Save a vector so I can use it outside the function
}
avg <- apply(Bcorrected_matrix, 1, mean)
stdev <- apply(Bcorrected_matrix, 1, sd)
colnames(Bcorrected_matrix) <- colnames(data[, col_numbers])
result <- list(avg = avg, stdev = stdev, Bcorrected_matrix = Bcorrected_matrix)
return(result)
}
# Do the correction to the target size, aka 27 SVL
result <- Bcorr(col_numbers = c(9:12), target_svl = 27, data = Tdat)
# Format results as a dataframe
Test <- as.data.frame(result$Bcorrected_matrix)
Tdat <- subset(Tdat, select = -c(9:12))
# Add information such as ID, Location, and Prevalence
Tdat<-cbind(Tdat,Test)
write.csv(Tdat,"080724_temp_body_corrected_data.csv",row.names=FALSE)
# summarize means of these parameters by ID
means <- Tdat %>%
group_by(ID, Location.x, Prevalence, `Frog Weight (g)`) %>%
summarize(Temp = mean(`Body Temp`),
Infection = mean(`Log Infection`),
Call_Rate_Inst = mean(Call_Rate_Inst),
Duration = mean(`Delta Time (s)`),
ICI = mean(ICI),
Period = mean(Call_Period),
Freq = mean(`Peak Freq (Hz)`),
Freq_5 = mean(`Freq 5% (Hz)`),
Freq_95 = mean(`Freq 95% (Hz)`),
BW = mean(`BW 90% (Hz)`))
## `summarise()` has grouped output by 'ID', 'Location.x', 'Prevalence'. You can
## override using the `.groups` argument.
means <- ungroup(means)
FR <- means %>% filter(Location.x=='FR')
CV <- means %>% filter(Location.x=='CV')
t.test(FR$Infection, CV$Infection)
##
## Welch Two Sample t-test
##
## data: FR$Infection and CV$Infection
## t = -0.60249, df = 59.999, p-value = 0.5491
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.7500039 0.4027840
## sample estimates:
## mean of x mean of y
## 1.138851 1.312461
t.test(FR$Call_Rate_Inst, CV$Call_Rate_Inst)
##
## Welch Two Sample t-test
##
## data: FR$Call_Rate_Inst and CV$Call_Rate_Inst
## t = -1.9423, df = 55.051, p-value = 0.05723
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -14.0872147 0.2203254
## sample estimates:
## mean of x mean of y
## 74.68630 81.61975
t.test(FR$Duration, CV$Duration)
##
## Welch Two Sample t-test
##
## data: FR$Duration and CV$Duration
## t = 1.7322, df = 56.067, p-value = 0.08873
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.001373744 0.018938119
## sample estimates:
## mean of x mean of y
## 0.1127904 0.1040082
t.test(FR$ICI, CV$ICI)
##
## Welch Two Sample t-test
##
## data: FR$ICI and CV$ICI
## t = 0.054917, df = 46.103, p-value = 0.9564
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1974357 0.2085117
## sample estimates:
## mean of x mean of y
## 0.8597254 0.8541874
t.test(FR$Period, CV$Period)
##
## Welch Two Sample t-test
##
## data: FR$Period and CV$Period
## t = 0.14294, df = 46.017, p-value = 0.887
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1873376 0.2159779
## sample estimates:
## mean of x mean of y
## 0.9725158 0.9581956
t.test(FR$Freq, CV$Freq)
##
## Welch Two Sample t-test
##
## data: FR$Freq and CV$Freq
## t = 1.0152, df = 49.687, p-value = 0.3149
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -28.79341 87.63243
## sample estimates:
## mean of x mean of y
## 2943.453 2914.033
t.test(FR$Freq_5, CV$Freq_5)
##
## Welch Two Sample t-test
##
## data: FR$Freq_5 and CV$Freq_5
## t = 0.35236, df = 54.354, p-value = 0.7259
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -50.56418 72.13100
## sample estimates:
## mean of x mean of y
## 2804.422 2793.638
t.test(FR$Freq_95, CV$Freq_95)
##
## Welch Two Sample t-test
##
## data: FR$Freq_95 and CV$Freq_95
## t = 1.1473, df = 52.172, p-value = 0.2565
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -26.20089 96.17296
## sample estimates:
## mean of x mean of y
## 3072.145 3037.159
t.test(FR$BW, CV$BW)
##
## Welch Two Sample t-test
##
## data: FR$BW and CV$BW
## t = 1.9928, df = 58.002, p-value = 0.05099
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1080514 48.5132778
## sample estimates:
## mean of x mean of y
## 267.7228 243.5202
??select
PCA <- means %>% dplyr::select(c('Call_Rate_Inst','Infection','Duration','ICI','Period','Freq','Freq_5','Freq_95','BW'))
PCA <- as.data.frame(PCA)
rownames(PCA) <- c(means$ID)
PCA.full <- Tdat %>% dplyr::select(c('Call_Rate_Inst','Log Infection','Delta Time (s)','ICI','Call_Period','Peak Freq (Hz)','Freq 5% (Hz)','Freq 95% (Hz)','BW 90% (Hz)'))
PCA.full <- as.data.frame(PCA.full)
cormat <- cor(Tdat[ , c(8:15)])
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
melted_cormat <- melt(cormat)
get_upper_tri <- function(cormat){
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
upper_tri <- get_upper_tri(cormat)
upper_cormat <- melt(upper_tri, na.rm = TRUE)
ggplot(data = upper_cormat, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
coord_fixed()+
geom_text(aes(Var2, Var1, label = round(value,4)), color = "black", size = 3)
# results
# SPECTRAL: peak freq is highly correlated with freq 5% and freq 90%
# TEMPORAL: call period and ICI are almost perfectly correlated lol. both also negatively correlated with call rate (makes sense)
cor.test(Tdat$`Frog Weight (g)`, Tdat$`Freq 5% (Hz)`, method = c("pearson"))
##
## Pearson's product-moment correlation
##
## data: Tdat$`Frog Weight (g)` and Tdat$`Freq 5% (Hz)`
## t = -7.2475, df = 1884, p-value = 6.164e-13
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2082820 -0.1204528
## sample estimates:
## cor
## -0.1646938
asprcomp <- prcomp(PCA, scale = TRUE) # regular R version
asprcomp
## Standard deviations (1, .., p=9):
## [1] 1.763058e+00 1.559440e+00 1.296917e+00 9.717221e-01 7.150851e-01
## [6] 5.115198e-01 2.460488e-01 2.139402e-06 1.254242e-15
##
## Rotation (n x k) = (9 x 9):
## PC1 PC2 PC3 PC4 PC5
## Call_Rate_Inst -0.21196158 0.48203170 -0.19657839 0.14738770 -0.383514498
## Infection -0.04736242 0.03633548 -0.27081295 -0.95700028 -0.026166299
## Duration -0.19785210 -0.14956536 0.58798983 -0.14023616 0.545198921
## ICI 0.29282582 -0.52013272 -0.14173100 0.03266812 -0.195102593
## Period 0.28469616 -0.53099854 -0.11293316 0.02579258 -0.168805929
## Freq 0.50183198 0.24232609 0.12477816 -0.08297010 0.007903299
## Freq_5 0.50345032 0.26274982 -0.04416357 0.01377631 0.243619142
## Freq_95 0.49107069 0.25129860 0.22199842 -0.05641808 -0.019896136
## BW -0.02089951 -0.02339827 0.66531222 -0.17541911 -0.654691548
## PC6 PC7 PC8 PC9
## Call_Rate_Inst -0.71697287 -0.034900617 -4.496259e-07 1.552163e-16
## Infection -0.06790084 -0.044183127 -8.850668e-08 -2.553097e-17
## Duration -0.52397894 0.004974373 -1.877855e-07 -3.558077e-02
## ICI -0.28501283 -0.026962646 -1.679128e-07 -7.088976e-01
## Period -0.31329400 -0.026883025 -1.784455e-07 7.044134e-01
## Freq -0.08049362 0.812679162 2.275804e-06 3.355827e-17
## Freq_5 -0.06856501 -0.390200525 -6.775339e-01 1.022936e-11
## Freq_95 -0.01762445 -0.419569904 6.829985e-01 -1.031159e-11
## BW 0.12613089 -0.081317883 -2.728752e-01 4.119601e-12
summary(asprcomp)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.7631 1.5594 1.2969 0.9717 0.71509 0.51152 0.24605
## Proportion of Variance 0.3454 0.2702 0.1869 0.1049 0.05682 0.02907 0.00673
## Cumulative Proportion 0.3454 0.6156 0.8025 0.9074 0.96420 0.99327 1.00000
## PC8 PC9
## Standard deviation 2.139e-06 1.254e-15
## Proportion of Variance 0.000e+00 0.000e+00
## Cumulative Proportion 1.000e+00 1.000e+00
get_eigenvalue(asprcomp)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 3.108373e+00 3.453747e+01 34.53747
## Dim.2 2.431852e+00 2.702057e+01 61.55805
## Dim.3 1.681993e+00 1.868881e+01 80.24686
## Dim.4 9.442438e-01 1.049160e+01 90.73845
## Dim.5 5.113467e-01 5.681630e+00 96.42008
## Dim.6 2.616525e-01 2.907250e+00 99.32733
## Dim.7 6.054002e-02 6.726668e-01 100.00000
## Dim.8 4.577043e-12 5.085603e-11 100.00000
## Dim.9 1.573122e-30 1.747913e-29 100.00000
# PC1 = Freq (+), Freq_5 (+), Freq_95(+) (34.54%)
# PC2 = Call_Rate Inst (+), ICI (-), Period (-) (61.56%)
# PC3 = Duration (+), BW (+) (80.24%)
# PC4 = Infection (-) (90.73%)
scores = scores(asprcomp)
fviz_pca_var(asprcomp,axes = c(4, 1))
fviz_pca_var(asprcomp,axes = c(4, 2))
fviz_pca_var(asprcomp,axes = c(4, 3))
# from https://rpubs.com/esobolewska/pcr-step-by-step
pcs <- as.data.frame(scores) # same as scores
# binding PC scores with the individual data (summarized for each individual)
full.data <- cbind(means, pcs)
# install.packages("pracma")
library(pracma)
##
## Attaching package: 'pracma'
## The following object is masked from 'package:vegan':
##
## procrustes
## The following object is masked from 'package:car':
##
## logit
## The following objects are masked from 'package:Matrix':
##
## expm, lu, tril, triu
## The following object is masked from 'package:purrr':
##
## cross
asprcomp <- prcomp(PCA, scale = TRUE)
rawLoadings <- asprcomp$rotation[,1:3] %*% diag(asprcomp$sdev, 3, 3)
rotatedLoadings <- varimax(rawLoadings)$loadings
invLoadings <- t(pracma::pinv(rotatedLoadings))
scores2 <- scale(PCA) %*% invLoadings
# from https://rpubs.com/esobolewska/pcr-step-by-step
pcs2 <- as.data.frame(scores2) # same as scores
# binding PC scores with the individual data (summarized for each individual)
full.data <- cbind(full.data, pcs2)
# install.packages("devtools")
library(devtools)
## Loading required package: usethis
##
## Attaching package: 'devtools'
## The following object is masked from 'package:permute':
##
## check
# devtools::install_github("arleyc/PCAtest")
library(PCAtest)
result<-PCAtest(PCA, 100, 100, 0.05, varcorr=FALSE, counter=FALSE, plot=TRUE)
##
## Sampling bootstrap replicates... Please wait
##
## Calculating confidence intervals of empirical statistics... Please wait
##
## Sampling random permutations... Please wait
##
## Comparing empirical statistics with their null distributions... Please wait
##
## ========================================================
## Test of PCA significance: 9 variables, 62 observations
## 100 bootstrap replicates, 100 random permutations
## ========================================================
##
## Empirical Psi = 10.6302, Max null Psi = 2.3947, Min null Psi = 0.6731, p-value = 0
## Empirical Phi = 0.3842, Max null Phi = 0.1824, Min null Phi = 0.0967, p-value = 0
##
## Empirical eigenvalue #1 = 3.10837, Max null eigenvalue = 2.06183, p-value = 0
## Empirical eigenvalue #2 = 2.43185, Max null eigenvalue = 1.64653, p-value = 0
## Empirical eigenvalue #3 = 1.68199, Max null eigenvalue = 1.40277, p-value = 0
## Empirical eigenvalue #4 = 0.94424, Max null eigenvalue = 1.25707, p-value = 1
## Empirical eigenvalue #5 = 0.51135, Max null eigenvalue = 1.07389, p-value = 1
## Empirical eigenvalue #6 = 0.26165, Max null eigenvalue = 0.98487, p-value = 1
## Empirical eigenvalue #7 = 0.06054, Max null eigenvalue = 0.84442, p-value = 1
## Empirical eigenvalue #8 = 0, Max null eigenvalue = 0.73536, p-value = 1
## Empirical eigenvalue #9 = 0, Max null eigenvalue = 0.59923, p-value = 1
##
## PC 1 is significant and accounts for 34.5% (95%-CI:32.9-41.6) of the total variation
## PC 2 is significant and accounts for 27% (95%-CI:21.4-32.2) of the total variation
## PC 3 is significant and accounts for 18.7% (95%-CI:14.2-20.8) of the total variation
##
## The first 3 PC axes are significant and account for 80.2% of the total variation
##
## Variables 4, 5, 6, 7, and 8 have significant loadings on PC 1
## Variables 1, 4, and 5 have significant loadings on PC 2
## Variables 3, and 9 have significant loadings on PC 3
ggplot(data = full.data, aes(x = Infection, y = PC1))+
geom_point()+
theme_classic()+
geom_smooth(method = 'lm',
se = TRUE)+
labs(x = "(Log) Fungal Load",
y = "PC1 (Spectral Components)",
title = "Fungal Load x PC1")+
theme(text = element_text(size = 12, family = "serif"))
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = full.data, aes(x = Infection, y = V1))+
geom_point()+
theme_classic()+
geom_smooth(method = 'lm',
se = TRUE)+
labs(x = "(Log) Fungal Load",
y = "PC1 (Spectral Components)",
title = "Fungal Load x V1")+
theme(text = element_text(size = 12, family = "serif"))
## `geom_smooth()` using formula = 'y ~ x'
hist(full.data$V1)
fit.norm <- fitdist(full.data$V1, "norm")
plot(fit.norm)
descdist(full.data$V1, discrete = FALSE, boot = 500)
## summary statistics
## ------
## min: -2.673025 max: 2.073538
## median: -0.1302353
## mean: -3.434752e-16
## estimated sd: 1
## estimated skewness: -0.1994627
## estimated kurtosis: 3.668347
# histogram looks normal, so use a normal regression with location as random variable
# first - regression with numerical variable load
lmodel_V1_load <- lm(V1~Infection, full.data)
summary(lmodel_V1_load)
##
## Call:
## lm(formula = V1 ~ Infection, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.64519 -0.48138 -0.03113 0.65669 2.13941
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.10189 0.18911 0.539 0.592
## Infection -0.08295 0.11370 -0.730 0.469
##
## Residual standard error: 1.004 on 60 degrees of freedom
## Multiple R-squared: 0.008792, Adjusted R-squared: -0.007728
## F-statistic: 0.5322 on 1 and 60 DF, p-value: 0.4685
# second - regression with categorical variable prevalence
lmodel_V1_prevalence <-lm(V1~Prevalence, full.data)
summary(lmodel_V1_prevalence)
##
## Call:
## lm(formula = V1 ~ Prevalence, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.62271 -0.50701 -0.08252 0.64953 2.12385
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1230 0.2369 0.519 0.606
## PrevalencePresent -0.1733 0.2812 -0.616 0.540
##
## Residual standard error: 1.005 on 60 degrees of freedom
## Multiple R-squared: 0.00629, Adjusted R-squared: -0.01027
## F-statistic: 0.3798 on 1 and 60 DF, p-value: 0.5401
ggplot(data = full.data, aes(x = Infection, y = PC2))+
geom_point()+
theme_classic()+
geom_smooth(method = 'lm',
se = TRUE)+
labs(x = "(Log) Fungal Load",
y = "PC2 (Temporal Components)",
title = "Fungal Load x PC2")+
theme(text = element_text(size = 12, family = "serif"))
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = full.data, aes(x = Infection, y = V2))+
geom_point()+
theme_classic()+
geom_smooth(method = 'lm',
se = TRUE)+
labs(x = "(Log) Fungal Load",
y = "PC2 (Temporal Components)",
title = "Fungal Load x PC2")+
theme(text = element_text(size = 12, family = "serif"))
## `geom_smooth()` using formula = 'y ~ x'
hist(scores2[, 2])
# first - regression with numerical variable load
lmodel_V2_load <- lm(V2~Infection, full.data)
summary(lmodel_V2_load)
##
## Call:
## lm(formula = V2 ~ Infection, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.7244 -0.2441 0.0942 0.5079 1.2516
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.06572 0.18960 -0.347 0.730
## Infection 0.05350 0.11399 0.469 0.641
##
## Residual standard error: 1.006 on 60 degrees of freedom
## Multiple R-squared: 0.003658, Adjusted R-squared: -0.01295
## F-statistic: 0.2203 on 1 and 60 DF, p-value: 0.6405
# second - regression with categorical variable prevalence
lmodel_V2_prevalence <-lm(V2~Prevalence, full.data)
summary(lmodel_V2_prevalence)
##
## Call:
## lm(formula = V2 ~ Prevalence, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.6807 -0.2164 0.1115 0.5308 1.2678
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1034 0.2371 0.436 0.664
## PrevalencePresent -0.1458 0.2815 -0.518 0.606
##
## Residual standard error: 1.006 on 60 degrees of freedom
## Multiple R-squared: 0.00445, Adjusted R-squared: -0.01214
## F-statistic: 0.2682 on 1 and 60 DF, p-value: 0.6065
ggplot(data = full.data, aes(x = Infection, y = PC3))+
geom_point()+
theme_classic()+
geom_smooth(method = 'lm',
se = TRUE)+
labs(x = "(Log) Fungal Load",
y = "PC3 (Bandwidth & Duration Components)",
title = "Fungal Load x PC3")+
theme(text = element_text(size = 12, family = "serif"))
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = full.data, aes(x = Infection, y = V3))+
geom_point()+
theme_classic()+
geom_smooth(method = 'lm',
se = TRUE)+
labs(x = "(Log) Fungal Load",
y = "PC3 (Bandwidth & Duration Components)",
title = "Fungal Load x PC3")+
theme(text = element_text(size = 12, family = "serif"))
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = full.data, aes(x = Infection, y = PC3, color = Prevalence))+
geom_point()+
theme_classic()+
geom_smooth(method = 'lm',
se = FALSE)+
labs(x = "(Log) Fungal Load",
y = "PC3 (Bandwidth & Duration Components)",
title = "Fungal Load x PC3")
## `geom_smooth()` using formula = 'y ~ x'
hist(scores2[, 3])
# first - regression with numerical variable load
lmodel_V3_load <- lm(V3~Infection, full.data)
summary(lmodel_V3_load)
##
## Call:
## lm(formula = V3 ~ Infection, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.18049 -0.65659 0.06561 0.69576 1.94017
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3781 0.1781 2.124 0.03784 *
## Infection -0.3078 0.1071 -2.875 0.00558 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9453 on 60 degrees of freedom
## Multiple R-squared: 0.1211, Adjusted R-squared: 0.1064
## F-statistic: 8.266 on 1 and 60 DF, p-value: 0.005581
# second - regression with categorical variable prevalence
lmodel_PC3_prevalence <-lm(PC3~Prevalence,full.data)
plot(lmodel_PC3_prevalence)
summary(lmodel_PC3_prevalence)
##
## Call:
## lm(formula = PC3 ~ Prevalence, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.98405 -0.87510 0.03902 0.92960 2.74680
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.6542 0.2915 2.245 0.02849 *
## PrevalencePresent -0.9218 0.3460 -2.664 0.00989 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.237 on 60 degrees of freedom
## Multiple R-squared: 0.1058, Adjusted R-squared: 0.0909
## F-statistic: 7.099 on 1 and 60 DF, p-value: 0.00989
ggplot(full.data,aes(x=Prevalence,y=V3))+
geom_boxplot(outlier.shape = NA)+
geom_jitter(position=position_jitter(0.1))+
theme_classic()
ggplot(full.data,aes(x=Prevalence,y=V3))+
facet_wrap(~Location.x,strip.position="bottom")+
geom_boxplot(outlier.shape = NA)+
geom_jitter(position=position_jitter(0.1))+
theme_classic()
ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, color = ID))+
geom_point()+
theme_classic()+
theme(legend.position = "none")
ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, color = ID))+
theme_classic()+
theme(legend.position = "none")+
geom_smooth(method = 'lm',
se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, group = ID, color = Location.x))+
theme_classic()+
theme(legend.position = "none")+
geom_smooth(method = 'lm',
se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(full.data,aes(x =Infection,y = V3, group = Location.x, color = Location.x))+
theme_classic()+
geom_point()+
geom_smooth(method = 'lm',
se = FALSE)+
scale_color_brewer(palette = "Set2")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(full.data,aes(x =Infection,y = Duration))+
theme_classic()+
geom_point()+
geom_smooth(method = 'lm',
se = FALSE)+
scale_color_brewer(palette = "Set2")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(full.data,aes(x =Infection,y = Duration, group = Location.x, color = Location.x))+
theme_classic()+
geom_point()+
geom_smooth(method = 'lm',
se = FALSE)+
scale_color_brewer(palette = "Set2")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(full.data,aes(x =Infection,y = BW))+
theme_classic()+
geom_point()+
geom_smooth(method = 'lm',
se = FALSE)+
scale_color_brewer(palette = "Set2")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(full.data,aes(x =Infection,y = BW, group = Location.x, color = Location.x))+
theme_classic()+
geom_point()+
geom_smooth(method = 'lm',
se = FALSE)+
scale_color_brewer(palette = "Set2")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(full.data, aes(x = BW, y = Duration, color = Infection))+
geom_point()+
theme_classic()+
scale_color_gradient(low = 'firebrick2', high = 'dodgerblue')
ggplot(full.data, aes(x = Infection, y = Duration))+
geom_point()+
theme_classic()+
stat_smooth(method = 'lm', formula = y~x, se = FALSE)
ggplot(full.data, aes(x = PC1, y = Duration))+
geom_point()+
theme_classic()+
stat_smooth(method = 'lm', formula = y~x, se = FALSE)
ggplot(full.data, aes(x = PC1, y = BW))+
geom_point()+
theme_classic()+
stat_smooth(method = 'lm', formula = y~x, se = FALSE)
# lm with duration
dur.load <- lm(Duration~Infection, full.data)
dur.prev <- lm(Duration~Prevalence, full.data)
dur.load.location <- lm(Duration~Infection*Location.x, full.data)
dur.prev.location <- lm(Duration~Prevalence*Location.x, full.data)
summary(dur.load)
##
## Call:
## lm(formula = Duration ~ Infection, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.037983 -0.015713 0.000237 0.014621 0.053128
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.110955 0.003798 29.216 <2e-16 ***
## Infection -0.002196 0.002283 -0.962 0.34
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02016 on 60 degrees of freedom
## Multiple R-squared: 0.01518, Adjusted R-squared: -0.001236
## F-statistic: 0.9247 on 1 and 60 DF, p-value: 0.3401
summary(dur.prev)
##
## Call:
## lm(formula = Duration ~ Prevalence, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.038416 -0.015547 -0.000023 0.014501 0.051607
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.111718 0.004759 23.476 <2e-16 ***
## PrevalencePresent -0.004875 0.005649 -0.863 0.392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02019 on 60 degrees of freedom
## Multiple R-squared: 0.01226, Adjusted R-squared: -0.0042
## F-statistic: 0.7449 on 1 and 60 DF, p-value: 0.3915
summary(dur.load.location)
##
## Call:
## lm(formula = Duration ~ Infection * Location.x, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.040505 -0.016103 -0.001656 0.013873 0.051570
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.041e-01 5.325e-03 19.553 <2e-16 ***
## Infection -7.977e-05 3.046e-03 -0.026 0.9792
## Location.xFR 1.340e-02 7.504e-03 1.785 0.0795 .
## Infection:Location.xFR -4.064e-03 4.545e-03 -0.894 0.3750
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0199 on 58 degrees of freedom
## Multiple R-squared: 0.07236, Adjusted R-squared: 0.02438
## F-statistic: 1.508 on 3 and 58 DF, p-value: 0.222
summary(dur.prev.location)
##
## Call:
## lm(formula = Duration ~ Prevalence * Location.x, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.041425 -0.015915 -0.001479 0.014708 0.048599
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1037859 0.0066324 15.648 <2e-16 ***
## PrevalencePresent 0.0003093 0.0078231 0.040 0.9686
## Location.xFR 0.0158635 0.0093796 1.691 0.0962 .
## PrevalencePresent:Location.xFR -0.0101078 0.0111374 -0.908 0.3679
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0199 on 58 degrees of freedom
## Multiple R-squared: 0.07268, Adjusted R-squared: 0.02472
## F-statistic: 1.515 on 3 and 58 DF, p-value: 0.2201
# lm with bandwidth
bw.load <- lm(BW~Infection, full.data)
bw.prev <- lm(BW~Prevalence, full.data)
bw.load.location <- lm(BW~Infection*Location.x, full.data)
bw.prev.location <- lm(BW~Prevalence*Location.x, full.data)
summary(bw.load)
##
## Call:
## lm(formula = BW ~ Infection, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -103.043 -28.328 5.809 26.109 93.602
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 262.479 9.174 28.61 <2e-16 ***
## Infection -5.900 5.516 -1.07 0.289
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48.7 on 60 degrees of freedom
## Multiple R-squared: 0.01871, Adjusted R-squared: 0.002357
## F-statistic: 1.144 on 1 and 60 DF, p-value: 0.2891
summary(bw.prev)
##
## Call:
## lm(formula = BW ~ Prevalence, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -91.376 -32.640 1.494 27.373 104.643
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 269.29 11.39 23.651 <2e-16 ***
## PrevalencePresent -19.81 13.52 -1.466 0.148
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48.31 on 60 degrees of freedom
## Multiple R-squared: 0.03456, Adjusted R-squared: 0.01847
## F-statistic: 2.148 on 1 and 60 DF, p-value: 0.148
summary(bw.load.location)
##
## Call:
## lm(formula = BW ~ Infection * Location.x, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -112.643 -34.408 1.241 29.110 83.728
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 252.116 12.848 19.622 <2e-16 ***
## Infection -6.549 7.349 -0.891 0.376
## Location.xFR 19.379 18.108 1.070 0.289
## Infection:Location.xFR 3.237 10.967 0.295 0.769
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48.02 on 58 degrees of freedom
## Multiple R-squared: 0.0778, Adjusted R-squared: 0.0301
## F-statistic: 1.631 on 3 and 58 DF, p-value: 0.192
summary(bw.prev.location)
##
## Call:
## lm(formula = BW ~ Prevalence * Location.x, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -104.008 -34.234 -2.317 28.765 92.011
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 257.764 15.854 16.258 <2e-16 ***
## PrevalencePresent -19.817 18.701 -1.060 0.294
## Location.xFR 23.050 22.421 1.028 0.308
## PrevalencePresent:Location.xFR 1.116 26.623 0.042 0.967
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 47.56 on 58 degrees of freedom
## Multiple R-squared: 0.09525, Adjusted R-squared: 0.04846
## F-statistic: 2.035 on 3 and 58 DF, p-value: 0.1189
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`
DATA %>% summarise(across(c(18,21:23), list(mean = ~mean(., na.rm = T),
sd = ~sd(., na.rm = T)
)))
## Body Temp_mean Body Temp_sd SVL_mean SVL_sd Tibia_mean Tibia_sd
## 1 15.09512 3.437289 27.52057 1.784544 13.92479 0.9396512
## Frog Weight (g)_mean Frog Weight (g)_sd
## 1 1.522853 0.3224575
# Body Temp
# Frog Weight (g)
# SVL
# Tibia