Packages used
library(mdatools)
library(vegan)
library(ggplot2)
library(plotly)
library(gapminder)
library(readr)
library(knitr)
library(rsconnect)
Import and organize data
needles <- read_csv("needles_avg_no_outliers.csv")
needles$Familytype <- as.factor(needles$Familytype)
needles <- subset(needles, Familytype=="Full")
Second-derivative transformation
expl <- as.numeric(needles$IBV)
spec <- needles[, 6:262]
derv <- prep.savgol(as.matrix(spec), width=15, porder=2, dorder=2)
fam <- as.factor(needles$Family)
NMDS + output
dist <- vegdist(derv, method = 'euclidian')
peak.NMDS <- metaMDS(dist, distance = "euclidian", k = 2, try = 1000)
## Run 0 stress 0.01586238
## Run 1 stress 0.03636626
## Run 2 stress 0.0158624
## ... Procrustes: rmse 1.86361e-05 max resid 8.175135e-05
## ... Similar to previous best
## Run 3 stress 0.06127177
## Run 4 stress 0.01586243
## ... Procrustes: rmse 2.605384e-05 max resid 0.0001291996
## ... Similar to previous best
## Run 5 stress 0.01586245
## ... Procrustes: rmse 3.13131e-05 max resid 0.0001597312
## ... Similar to previous best
## Run 6 stress 0.03636619
## Run 7 stress 0.05462379
## Run 8 stress 0.0158624
## ... Procrustes: rmse 1.827351e-05 max resid 0.0001500014
## ... Similar to previous best
## Run 9 stress 0.01586241
## ... Procrustes: rmse 2.313283e-05 max resid 0.0001105008
## ... Similar to previous best
## Run 10 stress 0.01586244
## ... Procrustes: rmse 2.945293e-05 max resid 0.0001469859
## ... Similar to previous best
## Run 11 stress 0.01586241
## ... Procrustes: rmse 2.128957e-05 max resid 0.0001042005
## ... Similar to previous best
## Run 12 stress 0.05042741
## Run 13 stress 0.0158624
## ... Procrustes: rmse 1.969214e-05 max resid 9.232542e-05
## ... Similar to previous best
## Run 14 stress 0.01586244
## ... Procrustes: rmse 3.085256e-05 max resid 0.0002145364
## ... Similar to previous best
## Run 15 stress 0.411361
## Run 16 stress 0.05302752
## Run 17 stress 0.0158624
## ... Procrustes: rmse 1.744763e-05 max resid 7.449848e-05
## ... Similar to previous best
## Run 18 stress 0.03636618
## Run 19 stress 0.01586242
## ... Procrustes: rmse 2.443069e-05 max resid 0.0001204856
## ... Similar to previous best
## Run 20 stress 0.05746475
## *** Best solution repeated 11 times
stressplot(peak.NMDS)
peak.NMDS$stress
## [1] 0.01586238
adonis2(dist ~ fam, permutations = 9999)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = dist ~ fam, permutations = 9999)
## Df SumOfSqs R2 F Pr(>F)
## fam 15 0.48675 0.24832 1.6738 0.0553 .
## Residual 76 1.47340 0.75168
## Total 91 1.96015 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Significance indicates that the families are significantly grouped by distance
scores <- as.data.frame(scores(peak.NMDS))
all.scores <- cbind.data.frame(expl, fam, scores)
colnames(all.scores) <- c("IBV", 'family', 'NMDS1', 'NMDS2')
centroids <- aggregate(cbind(NMDS1, NMDS2) ~ family*IBV, all.scores, mean)
f <- function(z)sd(z)/sqrt(length(z))
se <- aggregate(cbind(se.x=NMDS1, se.y=NMDS2) ~ family*IBV, all.scores, f)
centroids <- merge(centroids, se, by = "family")
centroids <- centroids[, c(1:4, 6,7)]
colnames(centroids) <- c("family","IBV", "NMDS1", "NMDS2", "se.1", "se.2")
p <- ggplot(centroids, aes(NMDS1, NMDS2, label = family)) + geom_point(aes(col = IBV)) + theme_bw() +
geom_point(data = centroids, aes(col=IBV)) +
geom_text(nudge_x = 0.005, nudge_y = 0.005) +
geom_errorbar(data=centroids, aes(ymin=NMDS2-se.2, ymax=NMDS2+se.2, col = IBV)) +
geom_errorbarh(data=centroids, aes(xmin=NMDS1-se.1, xmax=NMDS1+se.1, col = IBV)) +
geom_point(data = all.scores, aes(NMDS1, NMDS2, col = IBV, label = family), alpha=.5) +
labs(title = "Full sibs, needles") + scale_color_viridis_c()
## Warning in geom_point(data = all.scores, aes(NMDS1, NMDS2, col = IBV, label =
## family), : Ignoring unknown aesthetics: label
ggplotly(p) ## interactive plot
Extract average lesion length per family
Add data to NMDS by family. Because these are just averages, we can’t run a PERMANOVA, so this is mostly for visualiation purposes.
lesion <- read_csv("M053_Gclav_lesion_data_tidy_YrAvg.csv")
avg.lesion <- aggregate(lesion_mm_avgforyear ~ family, lesion, mean)
f <- function(z)sd(z)/sqrt(length(z))
se <- aggregate(lesion_mm_avgforyear ~ family, lesion, f)
lesion.df <- merge(avg.lesion, se, by = 'family')
colnames(lesion.df) <- c("family", "mean", "se")
merged.df <- merge(centroids, lesion.df, by = 'family')
colnames(merged.df) <- c("family","IBV", "NMDS1", "NMDS2", "se.1", "se.2",
"lesion.avg", 'se.avg')
Is there a relation between spectra, family, and lesion? Note: cannot run a PERMANOVA on lesion lengths since they are averaged by family
lesion.p <- ggplot(merged.df, aes(NMDS1, NMDS2, label = family)) +
geom_point(aes(col = lesion.avg)) + theme_bw() +
geom_text(nudge_x = 0.005, nudge_y = 0.005) +
geom_errorbar(data=merged.df, aes(ymin=NMDS2-se.2, ymax=NMDS2+se.2, col = lesion.avg), alpha = 0.8) +
geom_errorbarh(data=merged.df, aes(xmin=NMDS1-se.1, xmax=NMDS1+se.1, col = lesion.avg), alpha = 0.8) + scale_color_viridis_c()
ggplotly(lesion.p)
To better visualize the relationship between breeding value, lesion, and spectra (i.e. NDMS distances),
merged.df$phenotype[merged.df$IBV < 0.5] <- 'Susceptible'
merged.df$phenotype[merged.df$IBV > 0.5] <- 'Resistant'
pheno.p <- ggplot(merged.df, aes(NMDS1, NMDS2, label = family)) +
geom_point(aes(col = IBV, size = lesion.avg)) + theme_bw() +
geom_text(nudge_x = 0.005, nudge_y = 0.005) +
geom_errorbar(data=merged.df, aes(ymin=NMDS2-se.2, ymax=NMDS2+se.2, col = IBV)) +
geom_errorbarh(data=merged.df, aes(xmin=NMDS1-se.1, xmax=NMDS1+se.1, col = IBV)) + scale_color_viridis_b() +
labs(title = "Phloem, Full sibs", subtitle = "size indicates average lesion size") +
geom_point(data = all.scores, aes(NMDS1, NMDS2, col = IBV, label = family), alpha=.5)
## Warning in geom_point(data = all.scores, aes(NMDS1, NMDS2, col = IBV, label =
## family), : Ignoring unknown aesthetics: label
plot(pheno.p)
ggplotly(pheno.p)
This is just for visualization purposes to see if there are any patterns we can parse out –
I combined all the time data, did some data analysis to see if there were any associations between proportion of time spent and family/breeding value/susceptibility.
Then I paired the average time proportions for each family in the NMDS. We bypass a lot of assumptions here so I wouldn’t use this as an official analysis (especially since I’m not sure if you wanted to combine the two experiments M055 and M059) but just as a visualization or a guiding tool.
m055 <- read_csv("M055_pine_olfactometer_data_FINAL.csv")
m059 <- read_csv("M059_pine_olfactometer_data_FINAL.csv")
m055_sub <- m055[, c(4, 7, 10, 11)]
m059_sub <- m059[, c(7, 12, 15, 16)]
time_df <- rbind(m055_sub, m059_sub)
colnames(time_df) <- c("time", "family", "phenotype", "IBV")
By family
avg.time <- aggregate(time ~ family*IBV, time_df, mean)
f <- function(z)sd(z)/sqrt(length(z))
se <- aggregate(time ~ family, time_df, f)
time.df <- merge(avg.time, se, by = 'family')
colnames(time.df) <- c("family", "IBV", "mean", "se")
time.df$family <- as.factor(time.df$family)
time.df$phenotype[time.df$IBV < 0.5] <- 'Susceptible'
time.df$phenotype[time.df$IBV > 0.5] <- 'Resistant'
ggplot(time.df, aes(x= family, y = mean )) + geom_bar(stat='identity') +
geom_errorbar(aes(ymin=mean - se, ymax = mean + se), size = .3, width = .3)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
By phenotype
avg.time.p <- aggregate(time ~ phenotype, time_df, mean)
f <- function(z)sd(z)/sqrt(length(z))
se <- aggregate(time ~ phenotype, time_df, f)
time.df.p <- merge(avg.time.p, se, by = 'phenotype')
colnames(time.df.p) <- c("phenotype", "mean", "se")
ggplot(time.df.p, aes(x= phenotype, y = mean )) + geom_bar(stat='identity') +
geom_errorbar(aes(ymin=mean - se, ymax = mean + se), size = .3, width = .3)
Add average beetle time for average family centroids
time.w.centroid <- merge(avg.time[,c(1,3)], centroids, by ='family')
time.plot <- ggplot(time.w.centroid, aes(NMDS1, NMDS2, label = family)) +
geom_point(aes(col = IBV, size = time), show.legend = T) + theme_bw() +
geom_text(nudge_x = 0.005, nudge_y = 0.005) +
geom_errorbar(data=time.w.centroid, aes(ymin=NMDS2-se.2,
ymax=NMDS2+se.2, col = IBV)) +
geom_errorbarh(data=time.w.centroid, aes(xmin=NMDS1-se.1,
xmax=NMDS1+se.1, col = IBV)) + scale_color_viridis_b() +
geom_point(data = all.scores, aes(NMDS1, NMDS2,col = IBV, label = family))
## Warning in geom_point(data = all.scores, aes(NMDS1, NMDS2, col = IBV, label =
## family)): Ignoring unknown aesthetics: label
plot(time.plot)
ggplotly(time.plot)