Packages used

library(mdatools)
library(vegan)
library(ggplot2)
library(plotly)
library(gapminder)
library(readr)
library(knitr)
library(rsconnect)

Import and organize data

phloem            <- read_csv("phloem_avg_no_outliers.csv")
phloem$Familytype <- as.factor(phloem$Familytype)
phloem            <- subset(phloem, Familytype=="Full")

Second-derivative transformation

expl <- as.numeric(phloem$BV)
spec <- phloem[, 6:262]

derv <- prep.savgol(as.matrix(spec), width=15, porder=2, dorder=2)
fam  <- as.factor(phloem$Family)

NMDS + output

dist      <- vegdist(derv, method = 'euclidian')
peak.NMDS <- metaMDS(dist, distance = "euclidian", k = 2, try = 1000)
## Run 0 stress 0.02448102 
## Run 1 stress 0.02448099 
## ... New best solution
## ... Procrustes: rmse 4.128506e-05  max resid 0.0003296414 
## ... Similar to previous best
## Run 2 stress 0.0790682 
## Run 3 stress 0.02448098 
## ... New best solution
## ... Procrustes: rmse 1.768375e-05  max resid 0.0001556558 
## ... Similar to previous best
## Run 4 stress 0.024481 
## ... Procrustes: rmse 4.181735e-05  max resid 0.0003132847 
## ... Similar to previous best
## Run 5 stress 0.02448097 
## ... New best solution
## ... Procrustes: rmse 6.07727e-06  max resid 3.738975e-05 
## ... Similar to previous best
## Run 6 stress 0.06186777 
## Run 7 stress 0.06186777 
## Run 8 stress 0.08584219 
## Run 9 stress 0.02448098 
## ... Procrustes: rmse 3.50282e-06  max resid 2.571354e-05 
## ... Similar to previous best
## Run 10 stress 0.06186769 
## Run 11 stress 0.02448103 
## ... Procrustes: rmse 2.337688e-05  max resid 0.0001974047 
## ... Similar to previous best
## Run 12 stress 0.0790683 
## Run 13 stress 0.09973171 
## Run 14 stress 0.06650721 
## Run 15 stress 0.024481 
## ... Procrustes: rmse 1.636632e-05  max resid 0.0001261525 
## ... Similar to previous best
## Run 16 stress 0.09529035 
## Run 17 stress 0.02448101 
## ... Procrustes: rmse 1.932119e-05  max resid 0.000144132 
## ... Similar to previous best
## Run 18 stress 0.02448101 
## ... Procrustes: rmse 1.363344e-05  max resid 9.838641e-05 
## ... Similar to previous best
## Run 19 stress 0.02448101 
## ... Procrustes: rmse 1.818642e-05  max resid 0.0001210534 
## ... Similar to previous best
## Run 20 stress 0.02448097 
## ... New best solution
## ... Procrustes: rmse 7.30665e-06  max resid 6.07444e-05 
## ... Similar to previous best
## *** Best solution repeated 1 times
stressplot(peak.NMDS)

peak.NMDS$stress
## [1] 0.02448097
adonis2(dist ~ fam,  permutations = 9999)
## Significance indicates that the families are significantly grouped by distance

scores <- as.data.frame(scores(peak.NMDS))
expl   <- cbind.data.frame(expl, fam, scores)

ibv    <- as.data.frame(unique(expl$expl))

centroids <- aggregate(cbind(NMDS1, NMDS2) ~ fam, expl, mean)
f         <- function(z)sd(z)/sqrt(length(z))
se        <- aggregate(cbind(se.x=NMDS1, se.y=NMDS2) ~ fam, expl, f)
centroids <- merge(centroids, se, by = "fam")
centroids <- cbind(centroids, ibv)
colnames(centroids) <- c("family", "NMDS1", "NMDS2", "se.1", "se.2", "IBV")
p <- ggplot(centroids, aes(NMDS1, NMDS2, label = family)) + geom_point(aes(col = IBV)) + theme_bw() +
  geom_point(data = centroids, aes(col=IBV)) +
  geom_text(nudge_x = 0.005, nudge_y = 0.005) +
  geom_errorbar(data=centroids, aes(ymin=NMDS2-se.2, ymax=NMDS2+se.2, col = IBV)) +
  geom_errorbarh(data=centroids, aes(xmin=NMDS1-se.1, xmax=NMDS1+se.1, col = IBV)) +
  geom_point(data = expl, aes(NMDS1, NMDS2, col = expl, label = fam, col = expl), alpha=.5) +
  labs(title = "Full sibs, phloem") + scale_color_viridis_c()
## Warning in geom_point(data = expl, aes(NMDS1, NMDS2, col = expl, label = fam, :
## Ignoring unknown aesthetics: label and col
ggplotly(p) ## interactive plot
## Warning: Duplicated aesthetics after name standardisation: colour

Extract average lesion length per family

Add data to NMDS by family

lesion <- read_csv("M053_Gclav_lesion_data_tidy_YrAvg.csv")


avg.lesion <- aggregate(lesion_mm_avgforyear ~ family, lesion, mean)
f         <- function(z)sd(z)/sqrt(length(z))
se        <- aggregate(lesion_mm_avgforyear ~ family, lesion, f)

lesion.df <- merge(avg.lesion, se, by = 'family')

colnames(lesion.df) <- c("family", "mean", "se")


merged.df <- merge(centroids, lesion.df, by = 'family')

colnames(merged.df) <- c("family", "NMDS1", "NMDS2", "se.1", "se.2", "IBV", 
                         "lesion.avg", 'se.avg')

Is there a relation between spectra, family, and lesion? Note: cannot run a PERMANOVA on lesion lengths since they are averaged by family

lesion.p <- ggplot(merged.df, aes(NMDS1, NMDS2, label = family)) + geom_point(aes(col = lesion.avg)) + theme_bw() +
  geom_text(nudge_x = 0.005, nudge_y = 0.005) +
  geom_errorbar(data=merged.df, aes(ymin=NMDS2-se.2, ymax=NMDS2+se.2, col = lesion.avg), alpha = 0.8) +
  geom_errorbarh(data=merged.df, aes(xmin=NMDS1-se.1, xmax=NMDS1+se.1, col = lesion.avg), alpha = 0.8) + scale_color_viridis_c()

ggplotly(lesion.p)

To better visualize the relationship between breeding value, lesion, and spectra (i.e. NDMS distances),

colnames(merged.df)
## [1] "family"     "NMDS1"      "NMDS2"      "se.1"       "se.2"      
## [6] "IBV"        "lesion.avg" "se.avg"
merged.df$phenotype[merged.df$IBV < 0.5] <- 'Susceptible'
merged.df$phenotype[merged.df$IBV > 0.5] <- 'Resistant'


pheno.p <- ggplot(merged.df, aes(NMDS1, NMDS2, label = family)) + 
  geom_point(aes(col = IBV, size = lesion.avg)) + theme_bw() +
  geom_text(nudge_x = 0.005, nudge_y = 0.005) +
  geom_errorbar(data=merged.df, aes(ymin=NMDS2-se.2, ymax=NMDS2+se.2, col = IBV)) +
  geom_errorbarh(data=merged.df, aes(xmin=NMDS1-se.1, xmax=NMDS1+se.1, col = IBV)) + scale_color_viridis_b() +
  labs(title = "Phloem, Full sibs") + 
  geom_point(data = expl, aes(NMDS1, NMDS2, col = expl, label = fam), alpha=.5)
## Warning in geom_point(data = expl, aes(NMDS1, NMDS2, col = expl, label = fam),
## : Ignoring unknown aesthetics: label
ggplotly(pheno.p)