Packages used

library(mdatools)
library(vegan)
library(ggplot2)
library(plotly)
library(gapminder)
library(readr)
library(knitr)
library(rsconnect)

Import and organize data

needles            <- read_csv("needles_avg_no_outliers.csv")
needles$Familytype <- as.factor(needles$Familytype)
needles            <- subset(needles, Familytype=="Half")

Second-derivative transformation

expl <- as.numeric(needles$IBV)
spec <- needles[, 6:262]

derv <- prep.savgol(as.matrix(spec), width=15, porder=2, dorder=2)
fam  <- as.factor(needles$Family)

NMDS + output

dist      <- vegdist(derv, method = 'euclidian')
peak.NMDS <- metaMDS(dist, distance = "euclidian", k = 2, try = 1000)
## Run 0 stress 0.03568054 
## Run 1 stress 0.08014946 
## Run 2 stress 0.06910161 
## Run 3 stress 0.06502041 
## Run 4 stress 0.07872181 
## Run 5 stress 0.08290376 
## Run 6 stress 0.07121081 
## Run 7 stress 0.03568078 
## ... Procrustes: rmse 0.0001902405  max resid 0.00192617 
## ... Similar to previous best
## Run 8 stress 0.0949742 
## Run 9 stress 0.03568061 
## ... Procrustes: rmse 0.0001574931  max resid 0.001578905 
## ... Similar to previous best
## Run 10 stress 0.06499671 
## Run 11 stress 0.4149194 
## Run 12 stress 0.03573324 
## ... Procrustes: rmse 0.0006927939  max resid 0.006103874 
## ... Similar to previous best
## Run 13 stress 0.03573332 
## ... Procrustes: rmse 0.0007022604  max resid 0.006070296 
## ... Similar to previous best
## Run 14 stress 0.09728128 
## Run 15 stress 0.06204384 
## Run 16 stress 0.07131133 
## Run 17 stress 0.09651171 
## Run 18 stress 0.03568058 
## ... Procrustes: rmse 3.209044e-05  max resid 0.0002669233 
## ... Similar to previous best
## Run 19 stress 0.0356806 
## ... Procrustes: rmse 0.000158667  max resid 0.001669116 
## ... Similar to previous best
## Run 20 stress 0.0356805 
## ... New best solution
## ... Procrustes: rmse 0.0001162543  max resid 0.001235775 
## ... Similar to previous best
## *** Best solution repeated 1 times
stressplot(peak.NMDS)

peak.NMDS$stress
## [1] 0.0356805
adonis2(dist ~ fam,  permutations = 9999)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = dist ~ fam, permutations = 9999)
##           Df SumOfSqs      R2      F Pr(>F)   
## fam       39  0.60557 0.37924 1.8014 0.0014 **
## Residual 115  0.99124 0.62076                 
## Total    154  1.59681 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Significance indicates that the families are significantly grouped by distance

scores <- as.data.frame(scores(peak.NMDS))
all.scores   <- cbind.data.frame(expl, fam, scores)
colnames(all.scores) <- c("IBV", 'family', 'NMDS1', 'NMDS2')



centroids <- aggregate(cbind(NMDS1, NMDS2) ~ family*IBV, all.scores, mean)
f         <- function(z)sd(z)/sqrt(length(z))
se        <- aggregate(cbind(se.x=NMDS1, se.y=NMDS2) ~ family*IBV, all.scores, f)
centroids <- merge(centroids, se, by = "family")
centroids <- centroids[, c(1:4, 6,7)]
colnames(centroids) <- c("family","IBV",  "NMDS1", "NMDS2", "se.1", "se.2")
p <- ggplot(centroids, aes(NMDS1, NMDS2, label = family)) + geom_point(aes(col = IBV)) + theme_bw() +
  geom_point(data = centroids, aes(col=IBV)) +
  geom_text(nudge_x = 0.005, nudge_y = 0.005) +
  geom_errorbar(data=centroids, aes(ymin=NMDS2-se.2, ymax=NMDS2+se.2, col = IBV)) +
  geom_errorbarh(data=centroids, aes(xmin=NMDS1-se.1, xmax=NMDS1+se.1, col = IBV)) +
  geom_point(data = all.scores, aes(NMDS1, NMDS2, col = IBV, label = family), alpha=.5) +
  labs(title = "Half sibs, needles") + scale_color_viridis_c() 
## Warning in geom_point(data = all.scores, aes(NMDS1, NMDS2, col = IBV, label =
## family), : Ignoring unknown aesthetics: label
ggplotly(p) ## interactive plot

Since there’s a lot of overlap, I made the same figured zoomed in, which excludes some of the points but gives you a better look at the center.

p.zoom <- ggplot(centroids, aes(NMDS1, NMDS2, label = family)) + geom_point(aes(col = IBV)) + theme_bw() +
  geom_point(data = centroids, aes(col=IBV)) +
  geom_text(nudge_x = 0.005, nudge_y = 0.005) +
  geom_errorbar(data=centroids, aes(ymin=NMDS2-se.2, ymax=NMDS2+se.2, col = IBV)) +
  geom_errorbarh(data=centroids, aes(xmin=NMDS1-se.1, xmax=NMDS1+se.1, col = IBV)) +
  geom_point(data = all.scores, aes(NMDS1, NMDS2, col = IBV, label = family), alpha=.5) +
  labs(title = "Half sibs, needles") + scale_color_viridis_c()  + ylim(-.08, .07) + xlim(-.1, .099)
## Warning in geom_point(data = all.scores, aes(NMDS1, NMDS2, col = IBV, label =
## family), : Ignoring unknown aesthetics: label
ggplotly(p.zoom)

Also plotting just the average centroid with standard error for clarity.

p.cent <- ggplot(centroids, aes(NMDS1, NMDS2, label = family)) + geom_point(aes(col = IBV)) + theme_bw() +
  geom_point(data = centroids, aes(col=IBV)) +
  geom_text(nudge_x = 0.005, nudge_y = 0.005) +
  geom_errorbar(data=centroids, aes(ymin=NMDS2-se.2, ymax=NMDS2+se.2, col = IBV)) +
  geom_errorbarh(data=centroids, aes(xmin=NMDS1-se.1, xmax=NMDS1+se.1, col = IBV)) +
  labs(title = "Half sibs, needles") + scale_color_viridis_c() 

ggplotly(p.cent)