Robin,

Regarding the email I sent you 11/2/17: Here is an outline of how to color lables, arange labels and set the color labels of dendrograms using the package dendextend. I thought you might be interested in how I manipulated the dendrogram labels for future reference.

Install the package… install.packages('dendextend'). Read the data ‘af.bug.test.csv’ and run the code that you previously wrote/sent to me which is the chunk here:

#setwd("C:/Users/Jesse/Desktop/Grad/Thesis/Data/Analysis/Datasets/Multivariate")

alldata = read.table("C:/Users/Jesse/Desktop/Grad/Thesis/Data/Analysis/Datasets/Multivariate/af.bug.test.csv", T, sep=",")
library('dendextend')
###################################################
# Create prcomp, omitting categorical data and column 40 (all zero)
###################################################
alldataPCA = prcomp(alldata[, c(13:47)], scale=T, center=T)
#summary(alldataPCA)#--->If you wish see the distribution of the PCA variances

op=par(mfrow=c(1,1))
plot(alldataPCA)

###################################################
# Write PCA results to new files, including categorical data
###################################################

PCA.scores = data.frame(alldata$sample, alldata$wp, alldata$depth,
             alldata$macrophyte, alldata$codom, alldata$mixed, alldata$position,
             alldata$complexity,alldata$abundance,alldata$deep.rank,
             alldata$cwd, alldata$location,
             round(alldataPCA$x, 3))

write.table(PCA.scores, "alldataPCA.csv", quote=F, row.names=F, col.names=T, sep=",")

PCA.variables = data.frame(round(alldataPCA$rotation, 3))
write.table(PCA.variables, "PCA_variables.csv", quote=F, row.names=T, col.names=NA, sep=",")

###################################################
# Hcluster vs. macrophyte - two examples
###################################################
newdata = read.table("alldataPCA.csv", T, sep=",")

## plot showing all 38 scores labeled by macrophyte
distances = dist(newdata[, c(13:47)], method="euclidean")
eward = hclust(distances, method="ward.D")
plot(eward, labels=alldata$complexity, hang=-0.1, cex=0.65, xlab=" ", sub=" ",
      main="PC1-38", ylab="Euclidean Distance")

Here is an example of how I created the dendrogams using the PrComp 1-3 scores that you coded above.

distances = dist(newdata[, c(13:15)], method="euclidean")
eward = hclust(distances, method="ward.D")
dend <- as.dendrogram(eward)
#plot(dend, main = "Original dend") #---> plot the origninal dendrogram if you wish.

# Let's add some color:
colors_to_use <- as.numeric(alldata$complexity)

# But sort them based on their order in dend:
colors_to_use <- colors_to_use[order.dendrogram(dend)]

# Now we can use them
labels_colors(dend) <- colors_to_use
# Now each level of complexity has a color
#labels_colors(dend) #---> This will list the row numbers that appear on the dendrogram from left to right.  


#now we need to change the labels to the level names of complexity
dend.list<-as.character(alldata$complexity)

#now we need to change the order of labels from the order in which they appear in the original data to match the order in which they appear on the dendrogram we just made.
labels(dend)<- dend.list[order.dendrogram(dend)]

#Here is the final plot!
plot(dend, main = "Ordered color for every Complexity")

dev.off()
## null device 
##           1