So in summary:
DATA = all 30 calls for all males, full set of data (1886 observations)
Tdat = the temperature corrected, all 30 calls (1886 observations)
means = the temperature corrected means for each male (62 observations)
PCA = the means for the variables of interest, one for each male with the ID as the row name (not an actual variable), prepped for input into PCA observations (62 observations)
full.data = means + PC scores for each individual (62 observations)
asprcomp = results of the PCA, run with the 62 observations with the individuals
# for importing Raven selection files
library(Rraven)
# for graphs
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(RColorBrewer)
# for importing files
library(readr)
library(readxl)
# for linear regressions
library (lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(fitdistrplus)
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: survival
# for PCA
library (vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-6.1
library('factoextra')
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# for graphs
library(extrafont)
## Registering fonts with R
# function for calculating coefficient of variation
cv <- function(x) (sd(x)/mean(x))
# function that gets mode of a character value (e.g. which word is most common)
mode_char <- function(x) unique(x)[which.max(tabulate(match(x, unique(x))))]
DATA <- read_csv("~/Desktop/UTK/Tanner Lab/Analysis/Call Analysis/Peeper Analysis/080724_data.csv")
## Rows: 1886 Columns: 32
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Begin File, ID, Date.x, selec.file, Location.x, Date.y, Location.y...
## dbl (21): Begin Time (s), End Time (s), Delta Time (s), Peak Freq (Hz), Freq...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
DATA <- as.data.frame(DATA)
# Remember - if you change the name of the column for body temperature, you will need to edit the name within m & the Tcorrected_matrix to accurately reflect the new column name
Tcorr <- function(col_numbers, target_temp, data){
num_cols <- length(col_numbers)
Tcorrected_matrix <- matrix(NA, nrow = nrow(data), ncol = num_cols)
for(i in seq_along(col_numbers)){
m <- lm(data[,col_numbers[i]] ~ data$`Body Temp`, data)$coefficients[2] # Calculate m by doing a linear regression
Tcorrected_matrix[, i] <- data[,col_numbers[i]] - m * (data$`Body Temp` - target_temp) # Save a vector so I can use it outside the function
}
avg <- apply(Tcorrected_matrix, 1, mean)
stdev <- apply(Tcorrected_matrix, 1, sd)
colnames(Tcorrected_matrix) <- colnames(data[, col_numbers])
result <- list(avg = avg, stdev = stdev, Tcorrected_matrix = Tcorrected_matrix)
return(result)
}
# Set Values that you want to temperature correct for
# 3 = Delta Time
# 4 = Peak Freq
# 5 = Freq 5%
# 6 = Freq 95%
# 7 = BW 90%
# 13 = ICI
# 14 = Call Period
# 15 = Call Duration
selections <- c(3:7,13:15)
# Do the temperature correction to the target temp, aka 14
result <- Tcorr(selections, target_temp = 14, data = DATA)
# Format results as a dataframe
Tdat <- as.data.frame(result$Tcorrected_matrix)
# Add information such as ID, Location, and Prevalence
Tdat<-cbind(DATA[,c(9,10,12,18,23,31,32)], Tdat)
# 9 = ID
# 10 = Date
# 12 = Location
# 18 = Body Temp
# 23 = Frog Weight (g)
# 31 = Log Infection
# 32 = Prevalence
# write.csv(Tdat,"080724_temp_corrected_data.csv",row.names=FALSE)
Bcorr <- function(col_numbers, target_svl, data){
num_cols <- length(col_numbers)
Bcorrected_matrix <- matrix(NA, nrow = nrow(data), ncol = num_cols)
for(i in seq_along(col_numbers)){
m <- lm(data[,col_numbers[i]] ~ DATA$SVL, data)$coefficients[2] # Calculate m by doing a linear regression
Bcorrected_matrix[, i] <- data[,col_numbers[i]] - m * (DATA$SVL - target_svl) # Save a vector so I can use it outside the function
}
avg <- apply(Bcorrected_matrix, 1, mean)
stdev <- apply(Bcorrected_matrix, 1, sd)
colnames(Bcorrected_matrix) <- colnames(data[, col_numbers])
result <- list(avg = avg, stdev = stdev, Bcorrected_matrix = Bcorrected_matrix)
return(result)
}
# Do the correction to the target size, aka 27 SVL
result <- Bcorr(col_numbers = c(9:12), target_svl = 27, data = Tdat)
# Format results as a dataframe
Test <- as.data.frame(result$Bcorrected_matrix)
Tdat <- subset(Tdat, select = -c(9:12))
# Add information such as ID, Location, and Prevalence
Tdat<-cbind(Tdat,Test)
write.csv(Tdat,"080724_temp_body_corrected_data.csv",row.names=FALSE)
# summarize means of these parameters by ID
means <- Tdat %>%
group_by(ID, Location.x, Prevalence) %>%
summarize(Temp = mean(`Body Temp`),
Infection = mean(`Log Infection`),
Call_Rate_Inst = mean(Call_Rate_Inst),
Duration = mean(`Delta Time (s)`),
ICI = mean(ICI),
Period = mean(Call_Period),
Freq = mean(`Peak Freq (Hz)`),
Freq_5 = mean(`Freq 5% (Hz)`),
Freq_95 = mean(`Freq 95% (Hz)`),
BW = mean(`BW 90% (Hz)`),
Weight = mean(`Frog Weight (g)`)
)
## `summarise()` has grouped output by 'ID', 'Location.x'. You can override using
## the `.groups` argument.
means <- ungroup(means)
FR <- means %>% filter(Location.x=='FR')
CV <- means %>% filter(Location.x=='CV')
t.test(FR$Infection, CV$Infection)
##
## Welch Two Sample t-test
##
## data: FR$Infection and CV$Infection
## t = -0.60249, df = 59.999, p-value = 0.5491
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.7500039 0.4027840
## sample estimates:
## mean of x mean of y
## 1.138851 1.312461
t.test(FR$Call_Rate_Inst, CV$Call_Rate_Inst)
##
## Welch Two Sample t-test
##
## data: FR$Call_Rate_Inst and CV$Call_Rate_Inst
## t = -1.9423, df = 55.051, p-value = 0.05723
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -14.0872147 0.2203254
## sample estimates:
## mean of x mean of y
## 74.68630 81.61975
t.test(FR$Duration, CV$Duration)
##
## Welch Two Sample t-test
##
## data: FR$Duration and CV$Duration
## t = 1.7322, df = 56.067, p-value = 0.08873
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.001373744 0.018938119
## sample estimates:
## mean of x mean of y
## 0.1127904 0.1040082
t.test(FR$ICI, CV$ICI)
##
## Welch Two Sample t-test
##
## data: FR$ICI and CV$ICI
## t = 0.054917, df = 46.103, p-value = 0.9564
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1974357 0.2085117
## sample estimates:
## mean of x mean of y
## 0.8597254 0.8541874
t.test(FR$Period, CV$Period)
##
## Welch Two Sample t-test
##
## data: FR$Period and CV$Period
## t = 0.14294, df = 46.017, p-value = 0.887
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1873376 0.2159779
## sample estimates:
## mean of x mean of y
## 0.9725158 0.9581956
t.test(FR$Freq, CV$Freq)
##
## Welch Two Sample t-test
##
## data: FR$Freq and CV$Freq
## t = 1.0152, df = 49.687, p-value = 0.3149
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -28.79341 87.63243
## sample estimates:
## mean of x mean of y
## 2943.453 2914.033
t.test(FR$Freq_5, CV$Freq_5)
##
## Welch Two Sample t-test
##
## data: FR$Freq_5 and CV$Freq_5
## t = 0.35236, df = 54.354, p-value = 0.7259
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -50.56418 72.13100
## sample estimates:
## mean of x mean of y
## 2804.422 2793.638
t.test(FR$Freq_95, CV$Freq_95)
##
## Welch Two Sample t-test
##
## data: FR$Freq_95 and CV$Freq_95
## t = 1.1473, df = 52.172, p-value = 0.2565
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -26.20089 96.17296
## sample estimates:
## mean of x mean of y
## 3072.145 3037.159
t.test(FR$BW, CV$BW)
##
## Welch Two Sample t-test
##
## data: FR$BW and CV$BW
## t = 1.9928, df = 58.002, p-value = 0.05099
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1080514 48.5132778
## sample estimates:
## mean of x mean of y
## 267.7228 243.5202
t.test(FR$Weight, CV$Weight)
##
## Welch Two Sample t-test
##
## data: FR$Weight and CV$Weight
## t = 2.0912, df = 59.802, p-value = 0.04076
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.007268515 0.327523151
## sample estimates:
## mean of x mean of y
## 1.603333 1.435938
weight.location.model <- lm(Weight ~ Location.x, means)
summary(weight.location.model)
##
## Call:
## lm(formula = Weight ~ Location.x, data = means)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.70333 -0.20333 -0.03594 0.10971 0.89667
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.43594 0.05570 25.782 <2e-16 ***
## Location.xFR 0.16740 0.08007 2.091 0.0408 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3151 on 60 degrees of freedom
## Multiple R-squared: 0.0679, Adjusted R-squared: 0.05237
## F-statistic: 4.371 on 1 and 60 DF, p-value: 0.0408
ggplot(means, aes(y = Weight, x = Location.x))+
geom_boxplot()+
theme_classic()
DATA %>% summarise(across(c(18,21:23), list(mean = ~mean(., na.rm = T),
sd = ~sd(., na.rm = T)
)))
## Body Temp_mean Body Temp_sd SVL_mean SVL_sd Tibia_mean Tibia_sd
## 1 15.09512 3.437289 27.52057 1.784544 13.92479 0.9396512
## Frog Weight (g)_mean Frog Weight (g)_sd
## 1 1.522853 0.3224575
# Body Temp
# Frog Weight (g)
# SVL
# Tibia
DATA %>% group_by(Prevalence) %>% summarise(across(c(18,21:23), list(mean = ~mean(., na.rm = T),
sd = ~sd(., na.rm = T)
)))
## # A tibble: 2 × 9
## Prevalence `Body Temp_mean` `Body Temp_sd` SVL_mean SVL_sd Tibia_mean Tibia_sd
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Absent 15.5 3.44 27.5 1.21 14.0 0.854
## 2 Present 14.9 3.42 27.5 1.97 13.9 0.972
## # ℹ 2 more variables: `Frog Weight (g)_mean` <dbl>, `Frog Weight (g)_sd` <dbl>
ggplot(means, aes(y = Weight, x = Prevalence))+
geom_boxplot()+
theme_classic()
weight.prevalence.model <- lm(Weight ~ Prevalence, means)
summary(weight.prevalence.model)
##
## Call:
## lm(formula = Weight ~ Prevalence, data = means)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.66111 -0.20805 -0.02999 0.20114 1.00114
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.56111 0.07662 20.375 <2e-16 ***
## PrevalencePresent -0.06225 0.09095 -0.684 0.496
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3251 on 60 degrees of freedom
## Multiple R-squared: 0.007746, Adjusted R-squared: -0.008791
## F-statistic: 0.4684 on 1 and 60 DF, p-value: 0.4964
ggplot(means, aes(y = Weight, x = Prevalence, color = Prevalence))+
geom_boxplot()+
theme_classic()+
facet_grid(.~Location.x)
weight.prevalence.location.model <- lm(Weight ~ Prevalence * Location.x, means)
summary(weight.prevalence.location.model)
##
## Call:
## lm(formula = Weight ~ Prevalence * Location.x, data = means)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.70952 -0.19783 -0.06558 0.10217 0.89048
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.53333 0.10572 14.504 <2e-16 ***
## PrevalencePresent -0.13551 0.12470 -1.087 0.282
## Location.xFR 0.05556 0.14951 0.372 0.712
## PrevalencePresent:Location.xFR 0.15614 0.17753 0.880 0.383
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3172 on 58 degrees of freedom
## Multiple R-squared: 0.08691, Adjusted R-squared: 0.03968
## F-statistic: 1.84 on 3 and 58 DF, p-value: 0.1499
ggplot(means, aes(y = Weight, x = Infection))+
geom_point()+
theme_classic()+
facet_grid(.~Location.x)+
stat_smooth(method = "lm",
se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
weight.load.model <- lm(Weight ~ Infection, means)
summary(weight.load.model)
##
## Call:
## lm(formula = Weight ~ Infection, data = means)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64845 -0.22320 -0.03235 0.18170 1.01736
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.54845 0.06123 25.290 <2e-16 ***
## Infection -0.02565 0.03681 -0.697 0.489
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.325 on 60 degrees of freedom
## Multiple R-squared: 0.008028, Adjusted R-squared: -0.008505
## F-statistic: 0.4856 on 1 and 60 DF, p-value: 0.4886
ggplot(means, aes(y = Weight, x = Infection, color = Location.x))+
geom_point()+
theme_classic()+
stat_smooth(method = "lm",
se = TRUE)
## `geom_smooth()` using formula = 'y ~ x'
weight.load.location.model <- lm(Weight ~ Infection * Location.x, means)
summary(weight.load.location.model)
##
## Call:
## lm(formula = Weight ~ Infection * Location.x, data = means)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7197 -0.2161 -0.0450 0.1148 0.8692
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.50417 0.08481 17.735 <2e-16 ***
## Infection -0.05199 0.04851 -1.072 0.288
## Location.xFR 0.07723 0.11953 0.646 0.521
## Infection:Location.xFR 0.07125 0.07240 0.984 0.329
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.317 on 58 degrees of freedom
## Multiple R-squared: 0.08798, Adjusted R-squared: 0.04081
## F-statistic: 1.865 on 3 and 58 DF, p-value: 0.1456
DATA %>% group_by(Prevalence) %>% summarise(across(c(18,21:23), list(mean = ~mean(., na.rm = T),
sd = ~sd(., na.rm = T)
)))
## # A tibble: 2 × 9
## Prevalence `Body Temp_mean` `Body Temp_sd` SVL_mean SVL_sd Tibia_mean Tibia_sd
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Absent 15.5 3.44 27.5 1.21 14.0 0.854
## 2 Present 14.9 3.42 27.5 1.97 13.9 0.972
## # ℹ 2 more variables: `Frog Weight (g)_mean` <dbl>, `Frog Weight (g)_sd` <dbl>
FR2 <- DATA %>% filter(Location.x == 'FR')
CV2 <- DATA %>% filter(Location.x == 'CV')
t.test(FR2$SVL,CV2$SVL)
##
## Welch Two Sample t-test
##
## data: FR2$SVL and CV2$SVL
## t = -1.1282, df = 1865.6, p-value = 0.2594
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.25171080 0.06786881
## sample estimates:
## mean of x mean of y
## 27.47232 27.56424
t.test(FR2$Tibia,CV2$Tibia)
##
## Welch Two Sample t-test
##
## data: FR2$Tibia and CV2$Tibia
## t = 2.0209, df = 1815.2, p-value = 0.04343
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.002594924 0.173219470
## sample estimates:
## mean of x mean of y
## 13.97094 13.88303
ggplot(DATA, aes(y = Tibia, x = Location.x))+
geom_boxplot()+
theme_classic()
ggplot(DATA, aes(y = SVL, x = Location.x))+
geom_boxplot()+
theme_classic()
cormat <- cor(Tdat[ , c(8:15)])
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
melted_cormat <- melt(cormat)
get_upper_tri <- function(cormat){
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
upper_tri <- get_upper_tri(cormat)
upper_cormat <- melt(upper_tri, na.rm = TRUE)
ggplot(data = upper_cormat, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
coord_fixed()+
geom_text(aes(Var2, Var1, label = round(value,4)), color = "black", size = 3)
# results
# SPECTRAL: peak freq is highly correlated with freq 5% and freq 90%
# TEMPORAL: call period and ICI are almost perfectly correlated lol. both also negatively correlated with call rate (makes sense)
cor.test(Tdat$`Frog Weight (g)`, Tdat$`Freq 5% (Hz)`, method = c("pearson"))
##
## Pearson's product-moment correlation
##
## data: Tdat$`Frog Weight (g)` and Tdat$`Freq 5% (Hz)`
## t = -7.2475, df = 1884, p-value = 6.164e-13
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2082820 -0.1204528
## sample estimates:
## cor
## -0.1646938
PCA <- means %>% dplyr::select(c('Call_Rate_Inst','Duration','ICI','Period','Freq','Freq_5','Freq_95','BW'))
PCA <- as.data.frame(PCA)
rownames(PCA) <- c(means$ID)
PCA.full <- Tdat %>% dplyr::select(c('Call_Rate_Inst','Delta Time (s)','ICI','Call_Period','Peak Freq (Hz)','Freq 5% (Hz)','Freq 95% (Hz)','BW 90% (Hz)'))
PCA.full <- as.data.frame(PCA.full)
asprcomp <- prcomp(PCA, scale = TRUE) # regular R version
asprcomp
## Standard deviations (1, .., p=8):
## [1] 1.761732e+00 1.558847e+00 1.275686e+00 7.153068e-01 5.147411e-01
## [6] 2.495981e-01 2.141119e-06 1.243539e-15
##
## Rotation (n x k) = (8 x 8):
## PC1 PC2 PC3 PC4 PC5
## Call_Rate_Inst -0.20977688 -0.48220768 -0.22590419 -0.38901620 0.72129635
## Duration -0.20264941 0.14297984 0.60652038 0.54539909 0.52127578
## ICI 0.29185509 0.52295333 -0.13509349 -0.19666278 0.28516979
## Period 0.28347693 0.53350446 -0.10531740 -0.17036594 0.31331541
## Freq 0.50284691 -0.24059995 0.13990203 0.01041891 0.06749534
## Freq_5 0.50497675 -0.26010433 -0.04372966 0.24254392 0.07447527
## Freq_95 0.49080429 -0.25117525 0.23234676 -0.01960569 0.02027577
## BW -0.02535633 0.01713845 0.69013654 -0.65129482 -0.13416958
## PC6 PC7 PC8
## Call_Rate_Inst -0.02889076 -4.393422e-07 2.920178e-16
## Duration 0.01297750 -1.751072e-07 -3.558077e-02
## ICI -0.02358669 -1.611560e-07 -7.088976e-01
## Period -0.02308133 -1.710518e-07 7.044134e-01
## Freq 0.81548752 2.233240e-06 2.354059e-16
## Freq_5 -0.38987902 -6.775339e-01 -1.195418e-11
## Freq_95 -0.41803672 6.829985e-01 1.205034e-11
## BW -0.07827885 -2.728752e-01 -4.814578e-12
summary(asprcomp)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.762 1.5588 1.2757 0.71531 0.51474 0.24960 2.141e-06
## Proportion of Variance 0.388 0.3038 0.2034 0.06396 0.03312 0.00779 0.000e+00
## Cumulative Proportion 0.388 0.6917 0.8951 0.95909 0.99221 1.00000 1.000e+00
## PC8
## Standard deviation 1.244e-15
## Proportion of Variance 0.000e+00
## Cumulative Proportion 1.000e+00
get_eigenvalue(asprcomp)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 3.103701e+00 3.879627e+01 38.79627
## Dim.2 2.430004e+00 3.037505e+01 69.17131
## Dim.3 1.627374e+00 2.034217e+01 89.51348
## Dim.4 5.116639e-01 6.395799e+00 95.90928
## Dim.5 2.649584e-01 3.311980e+00 99.22126
## Dim.6 6.229923e-02 7.787404e-01 100.00000
## Dim.7 4.584391e-12 5.730489e-11 100.00000
## Dim.8 1.546389e-30 1.932987e-29 100.00000
# PC1 = Freq (+), Freq_5 (+), Freq_95(+) (38.8%)
# PC2 = Call_Rate Inst (+), ICI (-), Period (-) (69.17%)
# PC3 = Duration (+), BW (+) (89.51%)
# install.packages("devtools")
library(devtools)
## Loading required package: usethis
##
## Attaching package: 'devtools'
## The following object is masked from 'package:permute':
##
## check
# devtools::install_github("arleyc/PCAtest")
library(PCAtest)
result<-PCAtest(PCA, 100, 100, 0.05, varcorr=FALSE, counter=FALSE, plot=TRUE)
##
## Sampling bootstrap replicates... Please wait
##
## Calculating confidence intervals of empirical statistics... Please wait
##
## Sampling random permutations... Please wait
##
## Comparing empirical statistics with their null distributions... Please wait
##
## ========================================================
## Test of PCA significance: 8 variables, 62 observations
## 100 bootstrap replicates, 100 random permutations
## ========================================================
##
## Empirical Psi = 10.5221, Max null Psi = 1.9740, Min null Psi = 0.4493, p-value = 0
## Empirical Phi = 0.4335, Max null Phi = 0.1878, Min null Phi = 0.0896, p-value = 0
##
## Empirical eigenvalue #1 = 3.1037, Max null eigenvalue = 2.00862, p-value = 0
## Empirical eigenvalue #2 = 2.43, Max null eigenvalue = 1.50436, p-value = 0
## Empirical eigenvalue #3 = 1.62737, Max null eigenvalue = 1.37643, p-value = 0
## Empirical eigenvalue #4 = 0.51166, Max null eigenvalue = 1.19391, p-value = 1
## Empirical eigenvalue #5 = 0.26496, Max null eigenvalue = 1.07518, p-value = 1
## Empirical eigenvalue #6 = 0.0623, Max null eigenvalue = 0.88841, p-value = 1
## Empirical eigenvalue #7 = 0, Max null eigenvalue = 0.79225, p-value = 1
## Empirical eigenvalue #8 = 0, Max null eigenvalue = 0.70146, p-value = 1
##
## PC 1 is significant and accounts for 38.8% (95%-CI:37.2-51) of the total variation
## PC 2 is significant and accounts for 30.4% (95%-CI:22.7-34.4) of the total variation
## PC 3 is significant and accounts for 20.3% (95%-CI:15.1-22.6) of the total variation
##
## The first 3 PC axes are significant and account for 89.5% of the total variation
##
## Variables 3, 5, 6, and 7 have significant loadings on PC 1
## Variables 1, 3, and 4 have significant loadings on PC 2
## Variables 2, and 8 have significant loadings on PC 3
fviz_pca_var(asprcomp,axes = c(1, 2))
scores = scores(asprcomp)
pcs <- as.data.frame(scores) # same as scores
full.data <- cbind(means, pcs)
library(devtools)
library(pracma)
##
## Attaching package: 'pracma'
## The following object is masked from 'package:vegan':
##
## procrustes
## The following object is masked from 'package:car':
##
## logit
## The following objects are masked from 'package:Matrix':
##
## expm, lu, tril, triu
## The following object is masked from 'package:purrr':
##
## cross
rawLoadings <- asprcomp$rotation[,1:3] %*% diag(asprcomp$sdev, 3, 3)
rotatedLoadings <- varimax(rawLoadings)$loadings
invLoadings <- t(pracma::pinv(rotatedLoadings))
scores2 <- scale(PCA) %*% invLoadings
rotatedLoadings
##
## Loadings:
## [,1] [,2] [,3]
## Call_Rate_Inst -0.828 -0.311
## Duration -0.256 0.843
## ICI 0.965 -0.156
## Period 0.972 -0.114
## Freq 0.976
## Freq_5 0.946 -0.248
## Freq_95 0.988 0.102
## BW 0.125 0.873
##
## [,1] [,2] [,3]
## SS loadings 2.914 2.569 1.678
## Proportion Var 0.364 0.321 0.210
## Cumulative Var 0.364 0.685 0.895
# from https://rpubs.com/esobolewska/pcr-step-by-step
pcs2 <- as.data.frame(scores2) # same as scores
# binding PC scores with the individual data (summarized for each individual)
full.data <- cbind(full.data, pcs2)
ggplot(data = full.data, aes(x = Infection, y = PC1))+
geom_point()+
theme_classic()+
geom_smooth(method = 'lm',
se = TRUE)+
labs(x = "(Log) Fungal Load",
y = "PC1 (Spectral Components)",
title = "Fungal Load x P1")+
theme(text = element_text(size = 12, family = "serif"))
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = full.data, aes(x = Infection, y = V1))+
geom_point()+
theme_classic()+
geom_smooth(method = 'lm',
se = TRUE)+
labs(x = "(Log) Fungal Load",
y = "PC1 (Spectral Components)",
title = "Fungal Load x V1")+
theme(text = element_text(size = 12, family = "serif"))
## `geom_smooth()` using formula = 'y ~ x'
hist(full.data$V1)
descdist(full.data$V1, discrete = FALSE, boot = 500)
## summary statistics
## ------
## min: -2.850035 max: 2.154455
## median: -0.08022708
## mean: -3.403416e-16
## estimated sd: 1
## estimated skewness: -0.3132949
## estimated kurtosis: 3.897399
# histogram looks normal, so use a normal regression with location as random variable
# first - regression with numerical variable load
lmodel_V1_load <- lm(V1~Infection, full.data)
summary(lmodel_V1_load)
##
## Call:
## lm(formula = V1 ~ Infection, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8308 -0.4072 -0.0711 0.6937 2.2001
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.07053 0.18954 0.372 0.711
## Infection -0.05741 0.11396 -0.504 0.616
##
## Residual standard error: 1.006 on 60 degrees of freedom
## Multiple R-squared: 0.004212, Adjusted R-squared: -0.01238
## F-statistic: 0.2538 on 1 and 60 DF, p-value: 0.6163
# second - regression with categorical variable prevalence
lmodel_V1_prevalence <-lm(V1~Prevalence, full.data)
summary(lmodel_V1_prevalence)
##
## Call:
## lm(formula = V1 ~ Prevalence, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.80901 -0.41666 -0.06607 0.67173 2.19548
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1003 0.2372 0.423 0.674
## PrevalencePresent -0.1413 0.2815 -0.502 0.618
##
## Residual standard error: 1.006 on 60 degrees of freedom
## Multiple R-squared: 0.004181, Adjusted R-squared: -0.01242
## F-statistic: 0.2519 on 1 and 60 DF, p-value: 0.6176
ggplot(data = full.data, aes(x = Infection, y = PC2))+
geom_point()+
theme_classic()+
geom_smooth(method = 'lm',
se = TRUE)+
labs(x = "(Log) Fungal Load",
y = "PC2 (Temporal Components)",
title = "Fungal Load x PC2")+
theme(text = element_text(size = 12, family = "serif"))
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = full.data, aes(x = Infection, y = V2))+
geom_point()+
theme_classic()+
geom_smooth(method = 'lm',
se = TRUE)+
labs(x = "(Log) Fungal Load",
y = "PC2 (Temporal Components)",
title = "Fungal Load x V2")+
theme(text = element_text(size = 12, family = "serif"))
## `geom_smooth()` using formula = 'y ~ x'
hist(scores2[, 2])
# first - regression with numerical variable load
lmodel_V2_load <- lm(V2~Infection, full.data)
summary(lmodel_V2_load)
##
## Call:
## lm(formula = V2 ~ Infection, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2725 -0.5461 -0.1083 0.2548 5.6762
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.05394 0.18971 0.284 0.777
## Infection -0.04391 0.11406 -0.385 0.702
##
## Residual standard error: 1.007 on 60 degrees of freedom
## Multiple R-squared: 0.002464, Adjusted R-squared: -0.01416
## F-statistic: 0.1482 on 1 and 60 DF, p-value: 0.7016
# second - regression with categorical variable prevalence
lmodel_V2_prevalence <-lm(V2~Prevalence, full.data)
summary(lmodel_V2_prevalence)
##
## Call:
## lm(formula = V2 ~ Prevalence, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3327 -0.5253 -0.1624 0.2281 5.6296
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1112 0.2370 -0.469 0.641
## PrevalencePresent 0.1567 0.2814 0.557 0.580
##
## Residual standard error: 1.006 on 60 degrees of freedom
## Multiple R-squared: 0.005145, Adjusted R-squared: -0.01144
## F-statistic: 0.3103 on 1 and 60 DF, p-value: 0.5796
ggplot(data = full.data, aes(x = Infection, y = PC3))+
geom_point()+
theme_classic()+
geom_smooth(method = 'lm',
se = TRUE)+
labs(x = "(Log) Fungal Load",
y = "PC3 (Bandwidth & Duration Components)",
title = "Fungal Load x PC3")+
theme(text = element_text(size = 12, family = "serif"))
## `geom_smooth()` using formula = 'y ~ x'
# first - regression with numerical variable load
lmodel_PC3_load <- lm(PC3~Infection, full.data)
summary(lmodel_PC3_load)
##
## Call:
## lm(formula = PC3 ~ Infection, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3466 -0.9083 0.0878 0.8033 2.4599
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2109 0.2395 0.881 0.382
## Infection -0.1717 0.1440 -1.192 0.238
##
## Residual standard error: 1.271 on 60 degrees of freedom
## Multiple R-squared: 0.02314, Adjusted R-squared: 0.006861
## F-statistic: 1.421 on 1 and 60 DF, p-value: 0.2379
Anova(lmodel_PC3_load)
## Anova Table (Type II tests)
##
## Response: PC3
## Sum Sq Df F value Pr(>F)
## Infection 2.297 1 1.4214 0.2379
## Residuals 96.973 60
lmodel_PC3_load_location <-lm(PC3~Infection*Location.x,full.data)
summary(lmodel_PC3_load_location)
##
## Call:
## lm(formula = PC3 ~ Infection * Location.x, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.99457 -0.66330 -0.01938 0.97808 2.14122
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.29153 0.32789 -0.889 0.3776
## Infection -0.07553 0.18754 -0.403 0.6886
## Location.xFR 0.96968 0.46212 2.098 0.0402 *
## Infection:Location.xFR -0.15405 0.27988 -0.550 0.5842
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.225 on 58 degrees of freedom
## Multiple R-squared: 0.1226, Adjusted R-squared: 0.07721
## F-statistic: 2.701 on 3 and 58 DF, p-value: 0.05383
ggplot(full.data,aes(x=Prevalence,y=PC3))+
geom_boxplot(outlier.shape = NA)+
geom_jitter(position=position_jitter(0.1))+
theme_classic()
# second - regression with prevalence as factor
lmodel_PC3_prevalence <-lm(PC3~Prevalence,full.data)
summary(lmodel_PC3_prevalence)
##
## Call:
## lm(formula = PC3 ~ Prevalence, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2528 -0.9130 0.0393 0.8982 2.5386
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3701 0.2978 1.243 0.219
## PrevalencePresent -0.5215 0.3535 -1.475 0.145
##
## Residual standard error: 1.264 on 60 degrees of freedom
## Multiple R-squared: 0.03499, Adjusted R-squared: 0.01891
## F-statistic: 2.176 on 1 and 60 DF, p-value: 0.1454
ggplot(data = full.data, aes(x = Infection, y = V3))+
geom_point()+
theme_classic()+
geom_smooth(method = 'lm',
se = TRUE)+
labs(x = "(Log) Fungal Load",
y = "PC3 (Bandwidth & Duration Components)",
title = "Fungal Load x V3")+
theme(text = element_text(size = 12, family = "serif"))
## `geom_smooth()` using formula = 'y ~ x'
ggplot(full.data,aes(x =Infection,y = V3, group = Location.x, color = Location.x))+
theme_classic()+
geom_point()+
geom_smooth(method = 'lm',
se = FALSE)+
scale_color_brewer(palette = "Set2")
## `geom_smooth()` using formula = 'y ~ x'
hist(scores2[, 3])
# first - regression with numerical variable load
lmodel_V3_load <- lm(V3~Infection, full.data)
summary(lmodel_V3_load)
##
## Call:
## lm(formula = V3 ~ Infection, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1434 -0.6762 0.0298 0.7161 2.0619
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1562 0.1880 0.831 0.409
## Infection -0.1271 0.1130 -1.125 0.265
##
## Residual standard error: 0.9978 on 60 degrees of freedom
## Multiple R-squared: 0.02066, Adjusted R-squared: 0.004333
## F-statistic: 1.265 on 1 and 60 DF, p-value: 0.2651
Anova(lmodel_V3_load)
## Anova Table (Type II tests)
##
## Response: V3
## Sum Sq Df F value Pr(>F)
## Infection 1.26 1 1.2655 0.2651
## Residuals 59.74 60
# second - regression with categorical variable prevalence
lmodel_V3_prevalence <-lm(V3~Prevalence,full.data)
summary(lmodel_V3_prevalence)
##
## Call:
## lm(formula = V3 ~ Prevalence, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.07470 -0.67891 0.01612 0.63391 2.05429
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2722 0.2340 1.163 0.249
## PrevalencePresent -0.3835 0.2777 -1.381 0.172
##
## Residual standard error: 0.9926 on 60 degrees of freedom
## Multiple R-squared: 0.0308, Adjusted R-squared: 0.01465
## F-statistic: 1.907 on 1 and 60 DF, p-value: 0.1724
ggplot(full.data,aes(x=Prevalence,y=V3))+
geom_boxplot(outlier.shape = NA)+
geom_jitter(position=position_jitter(0.1))+
theme_classic()
# second - prevalence with location
lmodel_V3_prevalence_location <-lm(V3~Prevalence*Location.x,full.data)
summary(lmodel_V3_prevalence_location)
##
## Call:
## lm(formula = V3 ~ Prevalence * Location.x, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.89204 -0.63098 -0.07066 0.75074 1.79591
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1493 0.3194 -0.467 0.6420
## PrevalencePresent -0.1980 0.3767 -0.526 0.6012
## Location.xFR 0.8428 0.4517 1.866 0.0671 .
## PrevalencePresent:Location.xFR -0.3486 0.5363 -0.650 0.5183
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9581 on 58 degrees of freedom
## Multiple R-squared: 0.1272, Adjusted R-squared: 0.08203
## F-statistic: 2.817 on 3 and 58 DF, p-value: 0.04693
ggplot(full.data,aes(x=Prevalence,y=V3))+
facet_wrap(~Location.x,strip.position="bottom")+
geom_boxplot(outlier.shape = NA)+
geom_jitter(position=position_jitter(0.1))+
theme_classic()
ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, color = ID))+
geom_point()+
theme_classic()+
theme(legend.position = "none")
ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, color = ID))+
theme_classic()+
theme(legend.position = "none")+
geom_smooth(method = 'lm',
se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, color = `Log Infection`))+
geom_point()+
theme_classic()+
geom_smooth(aes(group = ID),
method = "lm",
se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, group = ID, color = Location.x))+
theme_classic()+
theme(legend.position = "none")+
geom_smooth(method = 'lm',
se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(Tdat, aes(`Delta Time (s)`, `BW 90% (Hz)`, color = Location.x))+
geom_point()+
theme_classic()+
geom_smooth(method = 'lm',
se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(means, aes(Duration, BW, color = Location.x))+
geom_point()+
theme_classic()+
geom_smooth(method = 'lm',
se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
slope.model <- lm(BW ~ Duration * Location.x,means)
Anova(slope.model)
## Anova Table (Type II tests)
##
## Response: BW
## Sum Sq Df F value Pr(>F)
## Duration 30558 1 17.0235 0.0001194 ***
## Location.x 2973 1 1.6560 0.2032537
## Duration:Location.x 1283 1 0.7148 0.4013268
## Residuals 104112 58
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(full.data,aes(x =Infection,y = Duration))+
theme_classic()+
geom_point()+
geom_smooth(method = 'lm',
se = FALSE)+
scale_color_brewer(palette = "Set2")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(full.data,aes(x =Infection,y = Duration, group = Location.x, color = Location.x))+
theme_classic()+
geom_point()+
geom_smooth(method = 'lm',
se = FALSE)+
scale_color_brewer(palette = "Set2")
## `geom_smooth()` using formula = 'y ~ x'
lmodel_duration_load <-lm(Duration~Infection*Location.x,full.data)
summary(lmodel_duration_load)
##
## Call:
## lm(formula = Duration ~ Infection * Location.x, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.040505 -0.016103 -0.001656 0.013873 0.051570
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.041e-01 5.325e-03 19.553 <2e-16 ***
## Infection -7.977e-05 3.046e-03 -0.026 0.9792
## Location.xFR 1.340e-02 7.504e-03 1.785 0.0795 .
## Infection:Location.xFR -4.064e-03 4.545e-03 -0.894 0.3750
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0199 on 58 degrees of freedom
## Multiple R-squared: 0.07236, Adjusted R-squared: 0.02438
## F-statistic: 1.508 on 3 and 58 DF, p-value: 0.222
ggplot(full.data,aes(x =Infection,y = BW))+
theme_classic()+
geom_point()+
geom_smooth(method = 'lm',
se = FALSE)+
scale_color_brewer(palette = "Set2")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(full.data,aes(x =Infection,y = BW, group = Location.x, color = Location.x))+
theme_classic()+
geom_point()+
geom_smooth(method = 'lm',
se = FALSE)+
scale_color_brewer(palette = "Set2")
## `geom_smooth()` using formula = 'y ~ x'
lmodel_bw_load <-lm(BW~Infection*Location.x,full.data)
summary(lmodel_bw_load)
##
## Call:
## lm(formula = BW ~ Infection * Location.x, data = full.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -112.643 -34.408 1.241 29.110 83.728
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 252.116 12.848 19.622 <2e-16 ***
## Infection -6.549 7.349 -0.891 0.376
## Location.xFR 19.379 18.108 1.070 0.289
## Infection:Location.xFR 3.237 10.967 0.295 0.769
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48.02 on 58 degrees of freedom
## Multiple R-squared: 0.0778, Adjusted R-squared: 0.0301
## F-statistic: 1.631 on 3 and 58 DF, p-value: 0.192
ggplot(full.data, aes(x = BW, y = Duration, color = Infection))+
geom_point()+
theme_classic()+
scale_color_gradient(low = 'firebrick2', high = 'dodgerblue')
ggplot(full.data, aes(x = BW, y = Duration, color = V3))+
geom_point()+
theme_classic()+
scale_color_gradient(low = 'firebrick2', high = 'dodgerblue')
ggplot(full.data, aes(x = Infection, y = Duration))+
geom_point()+
theme_classic()+
stat_smooth(method = 'lm', formula = y~x, se = FALSE)
ggplot(full.data, aes(x = Infection, y = BW))+
geom_point()+
theme_classic()+
stat_smooth(method = 'lm', formula = y~x, se = FALSE)
ggplot(full.data, aes(x = V3, y = Duration))+
geom_point()+
theme_classic()+
stat_smooth(method = 'lm', formula = y~x, se = FALSE)
ggplot(full.data, aes(x = V3, y = BW))+
geom_point()+
theme_classic()+
stat_smooth(method = 'lm', formula = y~x, se = FALSE)
# lm with duration
dur.load <- lm(Duration~Infection, full.data)
dur.prev <- lm(Duration~Prevalence, full.data)
dur.load.location <- lm(Duration~Infection*Location.x, full.data)
dur.prev.location <- lm(Duration~Prevalence*Location.x, full.data)
Anova(dur.load)
## Anova Table (Type II tests)
##
## Response: Duration
## Sum Sq Df F value Pr(>F)
## Infection 0.0003758 1 0.9247 0.3401
## Residuals 0.0243858 60
Anova(dur.prev)
## Anova Table (Type II tests)
##
## Response: Duration
## Sum Sq Df F value Pr(>F)
## Prevalence 0.0003036 1 0.7449 0.3915
## Residuals 0.0244580 60
Anova(dur.load.location)
## Anova Table (Type II tests)
##
## Response: Duration
## Sum Sq Df F value Pr(>F)
## Infection 0.0002810 1 0.7095 0.4031
## Location.x 0.0010994 1 2.7760 0.1011
## Infection:Location.x 0.0003166 1 0.7993 0.3750
## Residuals 0.0229698 58
Anova(dur.prev.location)
## Anova Table (Type II tests)
##
## Response: Duration
## Sum Sq Df F value Pr(>F)
## Prevalence 0.0002794 1 0.7058 0.40431
## Location.x 0.0011700 1 2.9553 0.09093 .
## Prevalence:Location.x 0.0003261 1 0.8237 0.36787
## Residuals 0.0229619 58
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# lm with bandwidth
bw.load <- lm(BW~Infection, full.data)
bw.prev <- lm(BW~Prevalence, full.data)
bw.load.location <- lm(BW~Infection*Location.x, full.data)
bw.prev.location <- lm(BW~Prevalence*Location.x, full.data)
Anova(bw.load)
## Anova Table (Type II tests)
##
## Response: BW
## Sum Sq Df F value Pr(>F)
## Infection 2714 1 1.1441 0.2891
## Residuals 142309 60
Anova(bw.prev)
## Anova Table (Type II tests)
##
## Response: BW
## Sum Sq Df F value Pr(>F)
## Prevalence 5012 1 2.148 0.148
## Residuals 140010 60
Anova(bw.load.location)
## Anova Table (Type II tests)
##
## Response: BW
## Sum Sq Df F value Pr(>F)
## Infection 2012 1 0.8727 0.35409
## Location.x 8369 1 3.6293 0.06173 .
## Infection:Location.x 201 1 0.0871 0.76891
## Residuals 133739 58
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Anova(bw.prev.location)
## Anova Table (Type II tests)
##
## Response: BW
## Sum Sq Df F value Pr(>F)
## Prevalence 4740 1 2.0952 0.15314
## Location.x 8797 1 3.8889 0.05338 .
## Prevalence:Location.x 4 1 0.0018 0.96671
## Residuals 131209 58
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`
# summarize cvs of these parameters by ID
cvs <- Tdat %>%
group_by(ID, Location.x, `Log Infection`, Prevalence) %>%
summarize(Temp = cv(`Body Temp`),
Call_Rate_Inst = cv(Call_Rate_Inst),
Duration = cv(`Delta Time (s)`),
ICI = cv(ICI),
Period = cv(Call_Period),
Freq = cv(`Peak Freq (Hz)`),
Freq_5 = cv(`Freq 5% (Hz)`),
Freq_95 = cv(`Freq 95% (Hz)`),
BW = cv(`BW 90% (Hz)`))
## `summarise()` has grouped output by 'ID', 'Location.x', 'Log Infection'. You
## can override using the `.groups` argument.
cvs
## # A tibble: 62 × 13
## # Groups: ID, Location.x, Log Infection [62]
## ID Location.x `Log Infection` Prevalence Temp Call_Rate_Inst Duration
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 PC-002-24 FR 0 Absent 0 0.103 0.0572
## 2 PC-003-24 FR 0 Absent 0 0.161 0.0816
## 3 PC-004-24 CV 1.01 Present 0 0.138 0.0710
## 4 PC-005-24 CV 1.25 Present 0 0.169 0.0552
## 5 PC-006-24 CV 2.11 Present 0 0.177 0.0688
## 6 PC-007-24 CV 0 Absent 0 0.153 0.0835
## 7 PC-009-24 FR 0 Absent 0 0.102 0.0721
## 8 PC-011-24 CV 0 Absent 0 0.162 0.0323
## 9 PC-012-24 FR 1.99 Present 0 0.0817 0.0773
## 10 PC-013-24 FR 0.788 Present 0 0.101 0.0442
## # ℹ 52 more rows
## # ℹ 6 more variables: ICI <dbl>, Period <dbl>, Freq <dbl>, Freq_5 <dbl>,
## # Freq_95 <dbl>, BW <dbl>
# print cvs as a separate csv file
write.csv(cvs,"peeper_infection_by_ID_cvs.csv",row.names=FALSE)
Summary: no difference in CVs between populations or infected/uninfected individuals.
FRCVs <- cvs %>% filter(Location.x == 'FR')
CVCVs <- cvs %>% filter(Location.x == 'CV')
PresentCVs <- cvs %>% filter(Prevalence == 'Present')
AbsentCVs <- cvs %>% filter(Prevalence == 'Absent')
ggplot(cvs, aes(x = Location.x, y = Freq))+
geom_boxplot()+
theme_classic()
t.test(FRCVs$Freq, CVCVs$Freq)
##
## Welch Two Sample t-test
##
## data: FRCVs$Freq and CVCVs$Freq
## t = 1.0061, df = 56.337, p-value = 0.3187
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.001864403 0.005627529
## sample estimates:
## mean of x mean of y
## 0.01257389 0.01069232
ggplot(cvs, aes(x = Prevalence, y = Freq))+
geom_boxplot()+
theme_classic()
t.test(PresentCVs$Freq, AbsentCVs$Freq)
##
## Welch Two Sample t-test
##
## data: PresentCVs$Freq and AbsentCVs$Freq
## t = -0.8019, df = 30.002, p-value = 0.4289
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.005988508 0.002611655
## sample estimates:
## mean of x mean of y
## 0.01111257 0.01280099
ggplot(cvs, aes(x = Location.x, y = Freq_5))+
geom_boxplot()+
theme_classic()
t.test(FRCVs$Freq_5, CVCVs$Freq_5)
##
## Welch Two Sample t-test
##
## data: FRCVs$Freq_5 and CVCVs$Freq_5
## t = 0.013126, df = 59.204, p-value = 0.9896
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.003827815 0.003878369
## sample estimates:
## mean of x mean of y
## 0.007648133 0.007622855
ggplot(cvs, aes(x = Prevalence, y = Freq_5))+
geom_boxplot()+
theme_classic()
t.test(PresentCVs$Freq_5, AbsentCVs$Freq_5)
##
## Welch Two Sample t-test
##
## data: PresentCVs$Freq_5 and AbsentCVs$Freq_5
## t = -0.42145, df = 35.976, p-value = 0.6759
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.004929489 0.003233257
## sample estimates:
## mean of x mean of y
## 0.007388859 0.008236975
ggplot(cvs, aes(x = Location.x, y = Freq_95))+
geom_boxplot()+
theme_classic()
t.test(FRCVs$Freq_95, CVCVs$Freq_95)
##
## Welch Two Sample t-test
##
## data: FRCVs$Freq_95 and CVCVs$Freq_95
## t = -0.96275, df = 59.456, p-value = 0.3396
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.004726666 0.001655493
## sample estimates:
## mean of x mean of y
## 0.007014720 0.008550306
ggplot(cvs, aes(x = Prevalence, y = Freq_95))+
geom_boxplot()+
theme_classic()
t.test(PresentCVs$Freq_95, AbsentCVs$Freq_95)
##
## Welch Two Sample t-test
##
## data: PresentCVs$Freq_95 and AbsentCVs$Freq_95
## t = -1.3481, df = 43.674, p-value = 0.1846
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.005164285 0.001025097
## sample estimates:
## mean of x mean of y
## 0.007206430 0.009276024
ggplot(cvs, aes(x = Location.x, y = Call_Rate_Inst))+
geom_boxplot()+
theme_classic()
t.test(FRCVs$Call_Rate_Inst, CVCVs$Call_Rate_Inst)
##
## Welch Two Sample t-test
##
## data: FRCVs$Call_Rate_Inst and CVCVs$Call_Rate_Inst
## t = -0.9424, df = 58.369, p-value = 0.3499
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.08727154 0.03139609
## sample estimates:
## mean of x mean of y
## 0.1977314 0.2256691
ggplot(cvs, aes(x = Prevalence, y = Call_Rate_Inst))+
geom_boxplot()+
theme_classic()
t.test(PresentCVs$Call_Rate_Inst, AbsentCVs$Call_Rate_Inst)
##
## Welch Two Sample t-test
##
## data: PresentCVs$Call_Rate_Inst and AbsentCVs$Call_Rate_Inst
## t = 0.95212, df = 44.067, p-value = 0.3462
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0304617 0.0850225
## sample estimates:
## mean of x mean of y
## 0.2200710 0.1927906
ggplot(cvs, aes(x = Location.x, y = ICI))+
geom_boxplot()+
theme_classic()
t.test(FRCVs$ICI, CVCVs$ICI)
##
## Welch Two Sample t-test
##
## data: FRCVs$ICI and CVCVs$ICI
## t = 1.1566, df = 58.855, p-value = 0.2521
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0744415 0.2783666
## sample estimates:
## mean of x mean of y
## 0.4564452 0.3544827
ggplot(cvs, aes(x = Prevalence, y = ICI))+
geom_boxplot()+
theme_classic()
t.test(PresentCVs$ICI, AbsentCVs$ICI)
##
## Welch Two Sample t-test
##
## data: PresentCVs$ICI and AbsentCVs$ICI
## t = 0.77809, df = 35.64, p-value = 0.4417
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1156235 0.2594857
## sample estimates:
## mean of x mean of y
## 0.4247026 0.3527715
ggplot(cvs, aes(x = Location.x, y = Period))+
geom_boxplot()+
theme_classic()
t.test(FRCVs$Period, CVCVs$Period)
##
## Welch Two Sample t-test
##
## data: FRCVs$Period and CVCVs$Period
## t = 1.1045, df = 59.109, p-value = 0.2739
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.07392361 0.25607257
## sample estimates:
## mean of x mean of y
## 0.4079865 0.3169120
ggplot(cvs, aes(x = Prevalence, y = Period))+
geom_boxplot()+
theme_classic()
t.test(PresentCVs$Period, AbsentCVs$Period)
##
## Welch Two Sample t-test
##
## data: PresentCVs$Period and AbsentCVs$Period
## t = 0.81432, df = 36.625, p-value = 0.4207
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1035769 0.2426950
## sample estimates:
## mean of x mean of y
## 0.3811749 0.3116158
ggplot(cvs, aes(x = Location.x, y = Duration))+
geom_boxplot()+
theme_classic()
t.test(FRCVs$Duration, CVCVs$Duration)
##
## Welch Two Sample t-test
##
## data: FRCVs$Duration and CVCVs$Duration
## t = 1.8339, df = 45.181, p-value = 0.07326
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.001505714 0.032188842
## sample estimates:
## mean of x mean of y
## 0.07392347 0.05858191
ggplot(cvs, aes(x = Prevalence, y = Duration))+
geom_boxplot()+
theme_classic()
t.test(PresentCVs$Duration, AbsentCVs$Duration)
##
## Welch Two Sample t-test
##
## data: PresentCVs$Duration and AbsentCVs$Duration
## t = 0.76379, df = 47.836, p-value = 0.4487
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.009741488 0.021674595
## sample estimates:
## mean of x mean of y
## 0.06773747 0.06177092
ggplot(cvs, aes(x = Location.x, y = BW))+
geom_boxplot()+
theme_classic()
t.test(FRCVs$BW, CVCVs$BW)
##
## Welch Two Sample t-test
##
## data: FRCVs$BW and CVCVs$BW
## t = 0.071225, df = 59.62, p-value = 0.9435
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03088406 0.03316433
## sample estimates:
## mean of x mean of y
## 0.1117418 0.1106017
ggplot(cvs, aes(x = Prevalence, y = BW))+
geom_boxplot()+
theme_classic()
t.test(PresentCVs$BW, AbsentCVs$BW)
##
## Welch Two Sample t-test
##
## data: PresentCVs$BW and AbsentCVs$BW
## t = -1.4231, df = 46.092, p-value = 0.1614
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.051086208 0.008768261
## sample estimates:
## mean of x mean of y
## 0.1050104 0.1261694
# use Excel to calculate the means and the sds of the CVs, then format it in a way that makes R happy
cv.table <- read_xlsx("~/Desktop/UTK/Tanner Lab/Analysis/Call Analysis/Peeper Analysis/Peeper_CVs_Means_SDs.xlsx")
ggplot(cv.table, aes(x=reorder(Variable, Means), y=Means,fill=Variable)) +
geom_bar(stat="identity")+
theme_classic()+
theme(axis.text.x = element_text(angle = 50, vjust = 0.5, hjust=0.5),
legend.position = "none")+
geom_errorbar( aes(ymin = Min, ymax = Max),
data = cv.table, width = 0.2, alpha = 0.3)+
geom_text(aes(label=round(Means,4)), vjust=-0.3, size=3.5, alpha=1)+
labs(x = "Variable",
y = "CV",
title = "Variables ordered by CV, with Min and Max CVs")+
scale_color_brewer(palette = "Set2")
ggplot(cv.table, aes(x=reorder(Variable, Means), y=Means,fill=Variable)) +
geom_bar(stat="identity")+
theme_classic()+
theme(axis.text.x = element_text(angle = 50, vjust = 0.5, hjust=0.5),
legend.position = "none")+
geom_errorbar( aes(ymin = Means - SDs, ymax = Means + SDs),
data = cv.table, width = 0.2, alpha = 0.3)+
geom_text(aes(label=round(Means,4)), vjust=-0.3, size=3.5, alpha=1)+
labs(x = "Variable",
y = "CV",
title = "Variables ordered by CV, with SDs")+
scale_color_brewer(palette = "Set2")