climate <- read.csv("NASAUnderstory.csv", header = TRUE)
str(climate)
## 'data.frame': 63 obs. of 32 variables:
## $ PlotID : int 2 3 12 14 15 16 18 19 20 21 ...
## $ Overstory.Species: Factor w/ 2 levels "Aspen","Spruce": 1 2 1 1 1 2 1 1 2 2 ...
## $ SPHA : int 68 0 60 16 68 0 62 62 0 0 ...
## $ BLIT : int 14 6 1 14 7 14 7 5 5 16 ...
## $ ASMA : int 0 18 0 0 0 14 0 0 8 26 ...
## $ MOSS : int 3 2 0 72 30 3 3 0 14 5 ...
## $ LEGR : int 33 0 5 14 27 0 6 6 0 0 ...
## $ CHCA : int 5 0 9 1 8 0 12 28 0 0 ...
## $ GRAS : int 5 5 12 2 4 2 4 1 4 1 ...
## $ SEDG : int 0 0 1 0 0 0 0 0 6 0 ...
## $ SMTR : int 14 0 14 13 12 0 0 0 0 0 ...
## $ PTAQ : int 0 2 0 0 0 1 0 0 14 18 ...
## $ COCA : int 0 0 0 0 0 0 0 0 10 6 ...
## $ VAAN : int 4 0 0 5 5 0 0 0 3 10 ...
## $ GAHI : int 28 0 2 7 13 0 0 0 0 0 ...
## $ ARNU : int 0 7 0 0 0 2 0 0 0 10 ...
## $ LYOB : int 0 0 0 0 0 1 0 0 6 11 ...
## $ PIMA : int 1 0 3 2 3 0 13 15 0 0 ...
## $ RUBU : int 0 6 0 0 0 1 0 0 0 3 ...
## $ VAOX : int 6 0 4 1 3 0 5 5 0 0 ...
## $ ACSP : int 0 2 0 0 0 17 0 0 1 1 ...
## $ COCO : int 0 1 0 0 0 6 0 0 8 5 ...
## $ ACRU : int 0 2 0 0 0 10 0 0 1 5 ...
## $ TRBO : int 0 2 0 0 0 1 0 0 1 4 ...
## $ MACA : int 0 2 0 0 0 0 0 0 0 4 ...
## $ CLBO : int 0 1 0 0 0 3 0 0 3 2 ...
## $ STRO : int 0 1 0 0 0 4 0 0 0 0 ...
## $ FUNG : int 1 1 4 0 4 1 1 0 0 0 ...
## $ DILO : int 0 3 0 0 0 1 0 0 0 1 ...
## $ ERIO : int 0 0 3 0 0 0 11 10 0 0 ...
## $ GATR : int 0 4 0 0 0 2 0 0 0 0 ...
## $ Labels : Factor w/ 30 levels "","Bedstraw (Narrow Leaves)",..: 26 8 3 23 18 19 15 24 5 6 ...
summary(climate)
## PlotID Overstory.Species SPHA BLIT
## Min. : 2.00 Aspen :30 Min. : 0.00 Min. : 0.0
## 1st Qu.: 44.00 Spruce:33 1st Qu.: 0.00 1st Qu.: 8.0
## Median : 69.00 Median : 2.00 Median :14.0
## Mean : 63.49 Mean :28.65 Mean :16.7
## 3rd Qu.: 87.50 3rd Qu.:61.00 3rd Qu.:24.0
## Max. :105.00 Max. :86.00 Max. :46.0
##
## ASMA MOSS LEGR CHCA
## Min. : 0.000 Min. : 0.000 Min. : 0.00 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 3.000 1st Qu.: 0.00 1st Qu.: 0.000
## Median : 2.000 Median : 5.000 Median : 0.00 Median : 0.000
## Mean : 9.714 Mean : 9.048 Mean : 6.73 Mean : 3.714
## 3rd Qu.:16.500 3rd Qu.:10.000 3rd Qu.: 8.50 3rd Qu.: 5.000
## Max. :48.000 Max. :72.000 Max. :36.00 Max. :28.000
##
## GRAS SEDG SMTR PTAQ
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 1.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 4.000 Median : 1.000 Median : 0.000 Median : 0.000
## Mean : 3.714 Mean : 3.619 Mean : 2.444 Mean : 2.317
## 3rd Qu.: 5.000 3rd Qu.: 2.500 3rd Qu.: 3.500 3rd Qu.: 2.500
## Max. :17.000 Max. :34.000 Max. :20.000 Max. :20.000
##
## COCA VAAN GAHI ARNU
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.000 Median : 1.000 Median : 0.000 Median : 0.000
## Mean : 2.175 Mean : 2.095 Mean : 1.984 Mean : 1.952
## 3rd Qu.: 4.000 3rd Qu.: 4.000 3rd Qu.: 3.000 3rd Qu.: 4.000
## Max. :17.000 Max. :10.000 Max. :28.000 Max. :14.000
##
## LYOB PIMA RUBU VAOX
## Min. : 0.000 Min. : 0.000 Min. :0.00 Min. :0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.:0.00 1st Qu.:0.000
## Median : 0.000 Median : 0.000 Median :0.00 Median :0.000
## Mean : 1.794 Mean : 1.746 Mean :1.46 Mean :1.444
## 3rd Qu.: 3.000 3rd Qu.: 2.500 3rd Qu.:3.00 3rd Qu.:3.000
## Max. :11.000 Max. :15.000 Max. :8.00 Max. :6.000
##
## ACSP COCO ACRU TRBO
## Min. : 0.000 Min. :0.000 Min. : 0.000 Min. :0.000
## 1st Qu.: 0.000 1st Qu.:0.000 1st Qu.: 0.000 1st Qu.:0.000
## Median : 0.000 Median :0.000 Median : 0.000 Median :0.000
## Mean : 1.413 Mean :1.413 Mean : 1.365 Mean :1.238
## 3rd Qu.: 1.000 3rd Qu.:2.000 3rd Qu.: 2.000 3rd Qu.:2.000
## Max. :17.000 Max. :9.000 Max. :10.000 Max. :5.000
##
## MACA CLBO STRO FUNG
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.000 Median :0.000 Median :0.000 Median :0.0000
## Mean :1.206 Mean :1.143 Mean :1.048 Mean :0.9683
## 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :6.000 Max. :6.000 Max. :5.000 Max. :5.0000
##
## DILO ERIO GATR
## Min. :0.0000 Min. : 0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.: 0.0000 1st Qu.:0.0000
## Median :0.0000 Median : 0.0000 Median :0.0000
## Mean :0.9048 Mean : 0.8889 Mean :0.8095
## 3rd Qu.:1.0000 3rd Qu.: 0.0000 3rd Qu.:1.0000
## Max. :8.0000 Max. :24.0000 Max. :4.0000
##
## Labels
## :34
## Bedstraw (Narrow Leaves): 1
## Big-leaved Aster : 1
## Blue-bead Lily : 1
## Bog False Solomon Seal : 1
## Bracken Fern : 1
## (Other) :24
climate.pca <- climate[3:13]
str(climate.pca)
## 'data.frame': 63 obs. of 11 variables:
## $ SPHA: int 68 0 60 16 68 0 62 62 0 0 ...
## $ BLIT: int 14 6 1 14 7 14 7 5 5 16 ...
## $ ASMA: int 0 18 0 0 0 14 0 0 8 26 ...
## $ MOSS: int 3 2 0 72 30 3 3 0 14 5 ...
## $ LEGR: int 33 0 5 14 27 0 6 6 0 0 ...
## $ CHCA: int 5 0 9 1 8 0 12 28 0 0 ...
## $ GRAS: int 5 5 12 2 4 2 4 1 4 1 ...
## $ SEDG: int 0 0 1 0 0 0 0 0 6 0 ...
## $ SMTR: int 14 0 14 13 12 0 0 0 0 0 ...
## $ PTAQ: int 0 2 0 0 0 1 0 0 14 18 ...
## $ COCA: int 0 0 0 0 0 0 0 0 10 6 ...
library(corrplot) #correlation plot
library(FactoMineR) #additional PCA analysis
library(ggplot2) #support scatterplot
library(GPArotation) #supports rotation
library(psych) #PCA package
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
pca.df = as.data.frame(lapply(climate.pca , as.numeric))
str(pca.df)
## 'data.frame': 63 obs. of 11 variables:
## $ SPHA: num 68 0 60 16 68 0 62 62 0 0 ...
## $ BLIT: num 14 6 1 14 7 14 7 5 5 16 ...
## $ ASMA: num 0 18 0 0 0 14 0 0 8 26 ...
## $ MOSS: num 3 2 0 72 30 3 3 0 14 5 ...
## $ LEGR: num 33 0 5 14 27 0 6 6 0 0 ...
## $ CHCA: num 5 0 9 1 8 0 12 28 0 0 ...
## $ GRAS: num 5 5 12 2 4 2 4 1 4 1 ...
## $ SEDG: num 0 0 1 0 0 0 0 0 6 0 ...
## $ SMTR: num 14 0 14 13 12 0 0 0 0 0 ...
## $ PTAQ: num 0 2 0 0 0 1 0 0 14 18 ...
## $ COCA: num 0 0 0 0 0 0 0 0 10 6 ...
climate.cor = cor(pca.df)
climate.cor
## SPHA BLIT ASMA MOSS LEGR
## SPHA 1.0000000 -0.5864081 -0.72062426 0.124570398 0.70679705
## BLIT -0.5864081 1.0000000 0.29235971 -0.102323955 -0.44056347
## ASMA -0.7206243 0.2923597 1.00000000 -0.312032088 -0.50488086
## MOSS 0.1245704 -0.1023240 -0.31203209 1.000000000 0.30140755
## LEGR 0.7067971 -0.4405635 -0.50488086 0.301407554 1.00000000
## CHCA 0.6471222 -0.5468535 -0.43789936 -0.118197447 0.43763540
## GRAS 0.1459794 -0.2318856 0.02902742 -0.154977268 0.11862005
## SEDG 0.3948176 -0.3409134 -0.31500679 0.004050561 0.02280618
## SMTR 0.4948647 -0.2859442 -0.43409381 0.434961499 0.47030932
## PTAQ -0.4644532 0.0814870 0.54833695 -0.177306278 -0.32568366
## COCA -0.5727870 0.2859041 0.30578500 -0.093797842 -0.39140393
## CHCA GRAS SEDG SMTR PTAQ
## SPHA 0.64712217 0.14597942 0.394817616 0.49486474 -0.46445318
## BLIT -0.54685346 -0.23188560 -0.340913352 -0.28594421 0.08148700
## ASMA -0.43789936 0.02902742 -0.315006793 -0.43409381 0.54833695
## MOSS -0.11819745 -0.15497727 0.004050561 0.43496150 -0.17730628
## LEGR 0.43763540 0.11862005 0.022806176 0.47030932 -0.32568366
## CHCA 1.00000000 0.09652412 0.387791822 0.08712632 -0.28139831
## GRAS 0.09652412 1.00000000 -0.270929392 0.05968249 -0.07908224
## SEDG 0.38779182 -0.27092939 1.000000000 0.20951973 -0.16144737
## SMTR 0.08712632 0.05968249 0.209519733 1.00000000 -0.27861933
## PTAQ -0.28139831 -0.07908224 -0.161447367 -0.27861933 1.00000000
## COCA -0.37854163 -0.08575924 -0.212353610 -0.32627244 0.38300912
## COCA
## SPHA -0.57278704
## BLIT 0.28590411
## ASMA 0.30578500
## MOSS -0.09379784
## LEGR -0.39140393
## CHCA -0.37854163
## GRAS -0.08575924
## SEDG -0.21235361
## SMTR -0.32627244
## PTAQ 0.38300912
## COCA 1.00000000
corrplot(climate.cor, method="ellipse")

pca = principal(pca.df, nfactors=3, rotate="none")
pca
## Principal Components Analysis
## Call: principal(r = pca.df, nfactors = 3, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 h2 u2 com
## SPHA 0.93 -0.13 0.02 0.89 0.11 1.0
## BLIT -0.64 0.37 -0.07 0.54 0.46 1.6
## ASMA -0.77 -0.21 0.13 0.66 0.34 1.2
## MOSS 0.30 0.78 0.02 0.69 0.31 1.3
## LEGR 0.75 0.13 0.29 0.67 0.33 1.4
## CHCA 0.68 -0.53 -0.14 0.76 0.24 2.0
## GRAS 0.12 -0.34 0.80 0.77 0.23 1.4
## SEDG 0.44 -0.20 -0.73 0.76 0.24 1.8
## SMTR 0.61 0.47 0.11 0.60 0.40 2.0
## PTAQ -0.58 -0.19 -0.03 0.37 0.63 1.2
## COCA -0.63 0.07 -0.06 0.40 0.60 1.0
##
## PC1 PC2 PC3
## SS loadings 4.27 1.51 1.32
## Proportion Var 0.39 0.14 0.12
## Cumulative Var 0.39 0.53 0.65
## Proportion Explained 0.60 0.21 0.19
## Cumulative Proportion 0.60 0.81 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.09
## with the empirical chi square 54.78 with prob < 0.00053
##
## Fit based upon off diagonal values = 0.94
plot(pca$values, type="b", ylab="Eigenvalues", xlab="Component")

pca.rotate = principal(pca.df, nfactors=3, rotate = "varimax")
pca.rotate
## Principal Components Analysis
## Call: principal(r = pca.df, nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC3 h2 u2 com
## SPHA 0.83 0.45 -0.01 0.89 0.11 1.5
## BLIT -0.72 -0.10 -0.09 0.54 0.46 1.1
## ASMA -0.50 -0.61 0.19 0.66 0.34 2.2
## MOSS -0.22 0.79 -0.10 0.69 0.31 1.2
## LEGR 0.51 0.59 0.23 0.67 0.33 2.3
## CHCA 0.86 -0.04 -0.09 0.76 0.24 1.0
## GRAS 0.26 -0.08 0.83 0.77 0.23 1.2
## SEDG 0.50 -0.01 -0.71 0.76 0.24 1.8
## SMTR 0.20 0.75 0.01 0.60 0.40 1.1
## PTAQ -0.35 -0.50 0.03 0.37 0.63 1.8
## COCA -0.54 -0.32 -0.04 0.40 0.60 1.6
##
## RC1 RC2 RC3
## SS loadings 3.30 2.48 1.33
## Proportion Var 0.30 0.23 0.12
## Cumulative Var 0.30 0.52 0.65
## Proportion Explained 0.46 0.35 0.19
## Cumulative Proportion 0.46 0.81 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.09
## with the empirical chi square 54.78 with prob < 0.00053
##
## Fit based upon off diagonal values = 0.94
pca.scores = pca.rotate$scores
pca.scores = as.data.frame(pca.scores)
pca.scores
## RC1 RC2 RC3
## 1 0.6746270655 1.52571290 1.08125879
## 2 -0.0005187963 -0.67710138 0.58751345
## 3 1.2505676033 0.44738160 1.80080885
## 4 -1.5871923206 3.75027363 -0.16914394
## 5 0.3783449500 2.14098082 0.65842418
## 6 -0.2912059263 -0.40191019 -0.05841779
## 7 1.1369877401 -0.35179497 0.25595647
## 8 1.9507719628 -0.89503434 -0.40385762
## 9 -0.5899014698 -0.80529802 -0.30836841
## 10 -0.8663244944 -1.26773374 -0.21470331
## 11 -0.8370119007 -1.13165113 0.33100881
## 12 1.2554953666 0.71307103 1.33000712
## 13 -0.0582159826 2.14933954 0.82217709
## 14 0.2221669566 0.55861023 0.14879868
## 15 -0.7088492746 1.79711070 -0.79649374
## 16 -0.4007935838 2.05922844 -0.12693704
## 17 0.1970853748 0.46148099 -1.22830671
## 18 1.2623408447 0.98803856 -2.38037273
## 19 0.3191284524 1.04322429 1.29745574
## 20 0.6283101258 0.08729548 -0.55051953
## 21 -0.6753065576 1.68566995 -0.59700857
## 22 1.6691854324 0.22258563 0.26500776
## 23 1.9531761336 0.12515625 3.18313043
## 24 0.9257286632 0.46899267 0.76965843
## 25 -0.6113630273 1.13742305 -0.76805936
## 26 0.7853430147 0.74153031 -0.26351473
## 27 1.9332969999 -0.48405144 -1.32716327
## 28 2.1668620428 -0.65321362 2.12745775
## 29 1.2330934295 -0.35489338 0.73512618
## 30 2.5117466563 -1.15825535 -3.00766316
## 31 0.7391614691 0.02437610 -1.01714445
## 32 -1.0992510155 -0.59979284 -0.34533100
## 33 -0.6703757886 -0.54553667 -0.30361429
## 34 -0.9414885522 -0.21692934 -0.43013890
## 35 -0.8974621120 -0.36225594 0.17879402
## 36 -0.5321911654 -0.40457463 0.26709428
## 37 -0.4923353417 -0.36807193 0.47963932
## 38 -1.2050412414 0.17253275 -0.55241275
## 39 -0.7112278489 -0.66773767 -0.07696602
## 40 -0.0005187963 -0.67710138 0.58751345
## 41 -0.7630744286 -0.44970109 0.33256080
## 42 -0.6064997041 -0.90348074 0.74707358
## 43 -0.5006733806 -0.45345396 0.40721421
## 44 -0.7351537859 -1.21643199 -0.28078065
## 45 -0.8335117101 -0.59151860 0.32848724
## 46 -0.6408058601 -0.51407868 0.30369686
## 47 -0.6255011437 -0.05688314 -0.06960956
## 48 -1.1023861430 -0.13679733 -0.41148168
## 49 -1.1452247129 0.31335124 -0.60201579
## 50 -0.6175372206 -0.29621244 0.26012667
## 51 -0.7580922954 -0.31509523 0.26227034
## 52 -0.5931202721 -0.51613988 0.16898898
## 53 -0.3699647673 -1.11760080 0.52827934
## 54 -0.3358055646 -0.83530333 0.93557378
## 55 -0.9972653950 -0.70206891 0.03211676
## 56 -0.5266589205 -1.62765331 0.17983169
## 57 -0.4235953093 -1.05265948 0.36395095
## 58 -1.0962057864 -0.69135822 0.22011849
## 59 0.4042507987 0.13527944 -2.01746563
## 60 -0.6339547011 0.40752078 -0.79628120
## 61 0.6798212632 0.18516287 0.26253607
## 62 1.7866102311 -0.46008745 -2.28902015
## 63 0.4175037205 0.61813327 -0.84686458
climate$SPHA = as.numeric(climate$SPHA)
climate$PC1 = pca.scores$RC1
climate$PC2 = pca.scores$RC2
climate$PC3= pca.scores$RC3
#Pincipal Components Regression
climate.lm = lm(SPHA~PC1+PC2+PC3, data=climate)
summary(climate.lm)
##
## Call:
## lm(formula = SPHA ~ PC1 + PC2 + PC3, data = climate)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.6240 -6.3251 -0.2731 7.5362 29.8318
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.651 1.359 21.075 < 2e-16 ***
## PC1 25.699 1.370 18.754 < 2e-16 ***
## PC2 14.056 1.370 10.257 9.85e-15 ***
## PC3 -0.295 1.370 -0.215 0.83
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.79 on 59 degrees of freedom
## Multiple R-squared: 0.8856, Adjusted R-squared: 0.8798
## F-statistic: 152.3 on 3 and 59 DF, p-value: < 2.2e-16
climate.lm2 = lm(SPHA~PC1+PC2, data=climate)
summary(climate.lm2)
##
## Call:
## lm(formula = SPHA ~ PC1 + PC2, data = climate)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.5741 -6.6156 -0.4673 7.5511 29.7879
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.651 1.349 21.25 < 2e-16 ***
## PC1 25.699 1.359 18.91 < 2e-16 ***
## PC2 14.056 1.359 10.34 5.95e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.7 on 60 degrees of freedom
## Multiple R-squared: 0.8856, Adjusted R-squared: 0.8817
## F-statistic: 232.1 on 2 and 60 DF, p-value: < 2.2e-16
plot(climate.lm2$fitted.values,
climate$SPHA, main="Predicted versus Actual",xlab="Predicted",ylab="Actual")

climate$pred = climate.lm2$fitted.values
climate=climate[order(-climate$SPHA),]
climate.best = climate[1:15,]
p2 = ggplot(climate, aes(x=PC1, y=PC2, label = Labels))
p2 + geom_point() +
geom_text(size=3, hjust=.2, vjust=-0.5, angle=0) +
xlim(-3,3) + ylim(-3,3)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text).
