alldata <- read.csv("C:/ShannonCall/Thesis/Data/CSV_Data/CSVData/simpveg.csv")
summary(alldata)
## site.location site. Site Transect Age Location
## a.1 : 6 a:12 Beaver :12 T1B1A : 1 Inter:12 Above:24
## a.2 : 6 b:12 Fishpark :12 T1B2A : 1 New :12 Below:24
## b.1 : 6 c:12 Harper :12 T1B3A : 1 Old :12
## b.2 : 6 d:12 Stillwater:12 T1B4B : 1 Pre :12
## c.1 : 6 T1B5B : 1
## c.2 : 6 T1B6B : 1
## (Other):12 (Other):42
## shannon div BareGround FatHen
## Min. :0.5654 high:16 Min. : 0.00 Min. : 0.000
## 1st Qu.:1.2882 low :16 1st Qu.: 0.75 1st Qu.: 0.000
## Median :1.5030 med :16 Median : 3.00 Median : 2.000
## Mean :1.5116 Mean : 4.50 Mean : 4.729
## 3rd Qu.:1.8230 3rd Qu.: 6.25 3rd Qu.: 4.000
## Max. :2.1240 Max. :24.00 Max. :33.000
##
## ColonialBentGrass HookerWillow SitkaWillow TreFoil
## Min. : 0.000 Min. :0.0000 Min. :0.00000 Min. :0.00
## 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00
## Median : 1.000 Median :0.0000 Median :0.00000 Median :0.00
## Mean : 5.896 Mean :0.5417 Mean :0.08333 Mean :0.75
## 3rd Qu.: 5.750 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00
## Max. :29.000 Max. :5.0000 Max. :2.00000 Max. :8.00
##
## WesternHemlock CommonRush QuackGrass HBB
## Min. :0.00000 Min. :0.0000 Min. :0.000 Min. :0.000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.00000 Median :0.0000 Median :0.000 Median :0.000
## Mean :0.02083 Mean :0.1667 Mean :0.625 Mean :0.625
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :1.00000 Max. :3.0000 Max. :4.000 Max. :6.000
##
## RoundLeafPlantain RedClover Scotchbroom Alder
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.04167 Mean :0.1875 Mean :0.2292 Mean :0.2917
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :4.0000 Max. :3.0000 Max. :7.0000
##
## CreepingButtercup VelvetGrass EquisetumHymale LWD
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. : 0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.: 0.0000
## Median :0.00000 Median :0.0000 Median :0.00000 Median : 0.0000
## Mean :0.02083 Mean :0.1042 Mean :0.02083 Mean : 0.5833
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.: 0.0000
## Max. :1.00000 Max. :2.0000 Max. :1.00000 Max. :10.0000
##
## Gumweed NootkaRose PacSilverweed Snowberry
## Min. : 0.000 Min. :0.0000 Min. :0.000 Min. :0.00000
## 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.00000
## Median : 1.500 Median :0.0000 Median :0.000 Median :0.00000
## Mean : 3.979 Mean :0.3125 Mean :0.125 Mean :0.04167
## 3rd Qu.: 6.250 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.00000
## Max. :30.000 Max. :8.0000 Max. :2.000 Max. :1.00000
##
## OxEyeDaisy CurlyDock BLMaple Soreil
## Min. :0.00000 Min. :0.00000 Min. : 0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.: 0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median : 0.0000 Median :0.00000
## Mean :0.04167 Mean :0.02083 Mean : 0.2917 Mean :0.02083
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.: 0.0000 3rd Qu.:0.00000
## Max. :2.00000 Max. :1.00000 Max. :10.0000 Max. :1.00000
##
## CanadaThistle ReedCanary CommonSpikeRush EnglishPlantain
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.0625 Mean :0.2292 Mean :0.0625 Mean :0.125
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :6.0000 Max. :2.0000 Max. :3.000
##
## SwordFern HairyCatEar Tansy SelfHeal
## Min. :0.00000 Min. :0.0000 Min. :0 Min. :0
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0 1st Qu.:0
## Median :0.00000 Median :0.0000 Median :0 Median :0
## Mean :0.02083 Mean :0.0625 Mean :0 Mean :0
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0 3rd Qu.:0
## Max. :1.00000 Max. :2.0000 Max. :0 Max. :0
##
## TrailingBB PickleWeed MeadowBarley SaltGrass
## Min. : 0.0000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.0000 Median : 1.000 Median : 0.000 Median : 0.000
## Mean : 0.2083 Mean : 9.604 Mean : 2.458 Mean : 3.188
## 3rd Qu.: 0.0000 3rd Qu.:15.000 3rd Qu.: 0.000 3rd Qu.: 7.000
## Max. :10.0000 Max. :40.000 Max. :36.000 Max. :15.000
##
## CanadianSandSpurry DuneGrass AmericanSeaRocket Ribwort
## Min. : 0.000 Min. : 0.000 Min. :0.0000 Min. :0.00000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:0.00000
## Median : 0.000 Median : 0.000 Median :0.0000 Median :0.00000
## Mean : 1.188 Mean : 2.083 Mean :0.3542 Mean :0.02083
## 3rd Qu.: 0.000 3rd Qu.: 0.000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :27.000 Max. :27.000 Max. :7.0000 Max. :1.00000
##
## Sandwort SeaPlantain CoastalPearlWort Montia
## Min. :0.0000 Min. :0.0000 Min. : 0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median : 0.0000 Median :0.0000
## Mean :0.1458 Mean :0.9167 Mean : 0.7708 Mean :0.1667
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.: 0.0000 3rd Qu.:0.0000
## Max. :7.0000 Max. :8.0000 Max. :19.0000 Max. :4.0000
##
## DouglasAster SeaArrow LyngbySedge BlueJointGrass
## Min. :0.00000 Min. : 0.000 Min. : 0.0000 Min. :0.000
## 1st Qu.:0.00000 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.:0.000
## Median :0.00000 Median : 0.000 Median : 0.0000 Median :0.000
## Mean :0.08333 Mean : 0.375 Mean : 0.3958 Mean :0.125
## 3rd Qu.:0.00000 3rd Qu.: 0.000 3rd Qu.: 0.0000 3rd Qu.:0.000
## Max. :2.00000 Max. :17.000 Max. :11.0000 Max. :4.000
##
## SpikeBentGrass JuncusGerardii BlueWildRye SweetPea
## Min. :0.00000 Min. : 0.000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.00000 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.00000 Median : 0.000 Median :0.0000 Median :0.000
## Mean :0.02083 Mean : 0.625 Mean :0.1458 Mean :0.375
## 3rd Qu.:0.00000 3rd Qu.: 0.000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.00000 Max. :19.000 Max. :5.0000 Max. :5.000
##
## JaumeaCarnosa ShorePine FowlersKnotweed QueenLace
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0
## Mean :0.8125 Mean :0.3125 Mean :0.02083 Mean :0
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0
## Max. :8.0000 Max. :7.0000 Max. :1.00000 Max. :0
##
## BlackMedick BindWeed Stickyweed TuftedHairGrass Dodder
## Min. :0 Min. :0 Min. :0 Min. :0.00000 Min. :0
## 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.:0.00000 1st Qu.:0
## Median :0 Median :0 Median :0 Median :0.00000 Median :0
## Mean :0 Mean :0 Mean :0 Mean :0.04167 Mean :0
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.00000 3rd Qu.:0
## Max. :0 Max. :0 Max. :0 Max. :2.00000 Max. :0
##
## BlackLocust OsoBerry DouglasFir NoddingSemaphore
## Min. : 0.0000 Min. : 0.0000 Min. :0.0000 Min. :0
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.:0.0000 1st Qu.:0
## Median : 0.0000 Median : 0.0000 Median :0.0000 Median :0
## Mean : 0.2083 Mean : 0.3333 Mean :0.1042 Mean :0
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.:0.0000 3rd Qu.:0
## Max. :10.0000 Max. :14.0000 Max. :5.0000 Max. :0
##
## Yarrow
## Min. :0
## 1st Qu.:0
## Median :0
## Mean :0
## 3rd Qu.:0
## Max. :0
##
attach(alldata)
op=par(mfrow=c(1,1))
alldataPCA = prcomp(alldata[,c(9:40,43:65,70:70,72:74)], scale=T, center=T)
summary(alldataPCA)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 2.4277 2.25428 2.03388 1.98576 1.87275 1.76461
## Proportion of Variance 0.0999 0.08613 0.07011 0.06683 0.05944 0.05278
## Cumulative Proportion 0.0999 0.18603 0.25614 0.32298 0.38242 0.43520
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 1.74036 1.67132 1.58145 1.56885 1.45198 1.43265
## Proportion of Variance 0.05134 0.04734 0.04239 0.04172 0.03573 0.03479
## Cumulative Proportion 0.48653 0.53388 0.57627 0.61798 0.65372 0.68850
## PC13 PC14 PC15 PC16 PC17 PC18
## Standard deviation 1.33848 1.32339 1.27801 1.24861 1.17939 1.11719
## Proportion of Variance 0.03036 0.02968 0.02768 0.02642 0.02358 0.02115
## Cumulative Proportion 0.71887 0.74855 0.77624 0.80266 0.82624 0.84739
## PC19 PC20 PC21 PC22 PC23 PC24
## Standard deviation 1.05634 1.02548 0.99574 0.91553 0.82834 0.77418
## Proportion of Variance 0.01891 0.01782 0.01681 0.01421 0.01163 0.01016
## Cumulative Proportion 0.86630 0.88413 0.90093 0.91514 0.92677 0.93693
## PC25 PC26 PC27 PC28 PC29 PC30
## Standard deviation 0.75035 0.72528 0.69389 0.60750 0.58858 0.56161
## Proportion of Variance 0.00954 0.00892 0.00816 0.00626 0.00587 0.00535
## Cumulative Proportion 0.94647 0.95539 0.96355 0.96980 0.97567 0.98102
## PC31 PC32 PC33 PC34 PC35 PC36
## Standard deviation 0.54105 0.46956 0.41237 0.35939 0.32950 0.30509
## Proportion of Variance 0.00496 0.00374 0.00288 0.00219 0.00184 0.00158
## Cumulative Proportion 0.98598 0.98972 0.99260 0.99479 0.99663 0.99821
## PC37 PC38 PC39 PC40 PC41 PC42
## Standard deviation 0.22588 0.14092 0.11774 0.08377 0.07008 0.06942
## Proportion of Variance 0.00086 0.00034 0.00023 0.00012 0.00008 0.00008
## Cumulative Proportion 0.99907 0.99941 0.99964 0.99976 0.99985 0.99993
## PC43 PC44 PC45 PC46 PC47 PC48
## Standard deviation 0.05058 0.03394 0.02042 0.01138 0.00326 1.979e-16
## Proportion of Variance 0.00004 0.00002 0.00001 0.00000 0.00000 0.000e+00
## Cumulative Proportion 0.99997 0.99999 1.00000 1.00000 1.00000 1.000e+00
attach(alldataPCA)
PCA.scores = data.frame(alldata$Site, alldata$div, round(alldataPCA$x, 3))
#write.table(PCA.scores, "alldataPCA.csv", quote=F, row.names=F, col.names=T, sep=",")
newdata = read.table("alldataPCA.csv", T, sep=",")
attach(newdata)
distances = dist(newdata[, c(3:23)], method="euclidean") #PCA Scores 1-21##
eward=hclust(distances, method="ward.D2")
plot(eward, labels=div, hang=0, cex=0.65, xlab="", sub="", main="PC1-21", ylab="Euclidean Distance")
plot(eward, labels=Site, hang=0, cex=0.65, xlab="", sub="", main="PC1-21", ylab="Euclidean Distance")
HCgroups.div = cutree(eward, 4)
table(HCgroups.div, div)
## div
## HCgroups.div high low med
## 1 14 14 9
## 2 0 0 1
## 3 1 2 6
## 4 1 0 0
chisq.test(HCgroups.div, div)
## Warning in chisq.test(HCgroups.div, div): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: HCgroups.div and div
## X-squared = 10.018, df = 6, p-value = 0.1239
HCgroups.site = cutree(eward, 3)
table(HCgroups.site, Site)
## Site
## HCgroups.site Beaver Fishpark Harper Stillwater
## 1 2 12 11 12
## 2 1 0 1 0
## 3 9 0 0 0
chisq.test(HCgroups.site, Site)
## Warning in chisq.test(HCgroups.site, Site): Chi-squared approximation may
## be incorrect
##
## Pearson's Chi-squared test
##
## data: HCgroups.site and Site
## X-squared = 36.649, df = 6, p-value = 2.062e-06
HCgroups = cutree(eward, 4)
table(HCgroups, Site)
## Site
## HCgroups Beaver Fishpark Harper Stillwater
## 1 2 12 11 12
## 2 0 0 1 0
## 3 9 0 0 0
## 4 1 0 0 0
chisq.test(HCgroups, Site)
## Warning in chisq.test(HCgroups, Site): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: HCgroups and Site
## X-squared = 40.649, df = 9, p-value = 5.795e-06
distances = dist(newdata[, c(3:18)], method="euclidean")
eward=hclust(distances, method="ward.D2")
plot(eward, labels=Site, hang=0, cex=0.65, xlab="", sub="", main="PC1-16 (80%)", ylab="Euclidean Distance")
HCgroups = cutree(eward, 3)
table(HCgroups, Site)
## Site
## HCgroups Beaver Fishpark Harper Stillwater
## 1 5 12 11 12
## 2 1 0 1 0
## 3 6 0 0 0
chisq.test(HCgroups, Site)
## Warning in chisq.test(HCgroups, Site): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: HCgroups and Site
## X-squared = 23.4, df = 6, p-value = 0.000673
distances1 = dist(newdata[, c(3:15)], method="euclidean")
eward1=hclust(distances1, method="ward.D2")
plot(eward1, labels=Site, hang=0, cex=0.65, xlab="", sub="", main="PC1-13 (72%)", ylab="Euclidean Distance")
HCgroups1 = cutree(eward1, 3)
table(HCgroups1, Site)
## Site
## HCgroups1 Beaver Fishpark Harper Stillwater
## 1 5 12 11 12
## 2 1 0 1 0
## 3 6 0 0 0
chisq.test(HCgroups1, Site)
## Warning in chisq.test(HCgroups1, Site): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: HCgroups1 and Site
## X-squared = 23.4, df = 6, p-value = 0.000673
distances2 = dist(newdata[, c(3:12)], method="euclidean")
eward2=hclust(distances2, method="ward.D2")
plot(eward2, labels=Site, hang=0, cex=0.65, xlab="", sub="", main="PC1-10 (62%)", ylab="Euclidean Distance")
HCgroups2 = cutree(eward2, 3)
table(HCgroups2, Site)
## Site
## HCgroups2 Beaver Fishpark Harper Stillwater
## 1 5 12 11 12
## 2 1 0 1 0
## 3 6 0 0 0
chisq.test(HCgroups2, Site)
## Warning in chisq.test(HCgroups2, Site): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: HCgroups2 and Site
## X-squared = 23.4, df = 6, p-value = 0.000673
distances3 = dist(newdata[, c(3:10)], method="euclidean")
eward3=hclust(distances3, method="ward.D2")
plot(eward3, labels=Site, hang=0, cex=0.65, xlab="", sub="", main="PC1-8 (53%)", ylab="Euclidean Distance")
HCgroups3 = cutree(eward3, 3)
table(HCgroups3, Site)
## Site
## HCgroups3 Beaver Fishpark Harper Stillwater
## 1 2 12 11 12
## 2 1 0 1 0
## 3 9 0 0 0
chisq.test(HCgroups3, Site)
## Warning in chisq.test(HCgroups3, Site): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: HCgroups3 and Site
## X-squared = 36.649, df = 6, p-value = 2.062e-06
distances4 = dist(newdata[, c(3:9)], method="euclidean")
eward4=hclust(distances4, method="ward.D2")
plot(eward4, labels=Site, hang=0, cex=0.65, xlab="", sub="", main="PC1-7 (49%)", ylab="Euclidean Distance")
HCgroups4 = cutree(eward4, 3)
table(HCgroups4, Site)
## Site
## HCgroups4 Beaver Fishpark Harper Stillwater
## 1 2 12 11 12
## 2 1 0 1 0
## 3 9 0 0 0
chisq.test(HCgroups4, Site)
## Warning in chisq.test(HCgroups4, Site): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: HCgroups4 and Site
## X-squared = 36.649, df = 6, p-value = 2.062e-06
distances5 = dist(newdata[, c(3:7)], method="euclidean")
eward5=hclust(distances5, method="ward.D2")
plot(eward5, labels=Site, hang=0, cex=0.65, xlab="", sub="", main="PC1-5 (38%)", ylab="Euclidean Distance")
HCgroups5 = cutree(eward5, 3)
table(HCgroups5, Site)
## Site
## HCgroups5 Beaver Fishpark Harper Stillwater
## 1 2 12 11 12
## 2 1 0 1 0
## 3 9 0 0 0
chisq.test(HCgroups5, Site)
## Warning in chisq.test(HCgroups5, Site): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: HCgroups5 and Site
## X-squared = 36.649, df = 6, p-value = 2.062e-06
# I think clustering on principal components 1 - 5 yields the most meaningful results, since Beaver clusters mostly by itself. There is one misclassification from Harper in that cluster - and I know which sample it is, since Harper had a very distinct outlier.
distances6 = dist(newdata[, c(3:5)], method="euclidean")
eward6=hclust(distances6, method="ward.D2")
plot(eward6, labels=Site, hang=0, cex=0.65, xlab="", sub="", main="PC1-3 (26%)", ylab="Euclidean Distance")
HCgroups6 = cutree(eward6, 3)
table(HCgroups6, Site)
## Site
## HCgroups6 Beaver Fishpark Harper Stillwater
## 1 5 12 11 12
## 2 1 0 1 0
## 3 6 0 0 0
chisq.test(HCgroups6, Site)
## Warning in chisq.test(HCgroups6, Site): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: HCgroups6 and Site
## X-squared = 23.4, df = 6, p-value = 0.000673
op = par(mfrow=c(1,1))
species.prcomp <- prcomp(alldata[,c(9:40,43:65,70:70,72:74)], scale=T, center=T)
species.prcomp$rotation
species.prcomp$x
plot(species.prcomp$x,
main="Principal Components Ordination of Vegetation by Site",
pch=c(21,22,23,24)[unclass(alldata$Site)],
bg=c("pink", "violet", "purple", "yellow")[unclass(alldata$Site)],
cex=1.5)
abline(h=0); abline(v=0)
legend(x="topright", c("Harper", "Stillwater", "Beaver", "Fishpark"),
pch=c(21, 22, 23, 24), pt.bg=c("pink", "violet", "purple", "yellow"),
bty="n", cex=1)
plot(species.prcomp$x[,c(2:3)],
main="Principal Components Ordination of Vegetation by Site",
pch=c(21,22,23,24)[unclass(alldata$Site)],
bg=c("pink", "violet", "purple", "yellow")[unclass(alldata$Site)],cex=1.5)
abline(h=0); abline(v=0)
legend(x="topright", c("Harper", "Stillwater", "Beaver", "Fishpark"),
pch=c(21, 22, 23, 24), pt.bg=c("pink", "violet", "purple", "yellow"),
bty="n", cex=1)