Prev.Pos.Dogs.T1 = Prevalence of positive Dog Serology from original data at time point 1
Prev.Pos.Dogs.T3 = Prevalence of positive Dog Serology from original data at time point 3
Inc.MST2 = Incidence of positive Human MST from original data
Mean.Max.Saliva = I calculated the maximum saliva test for each dog and found the average of these maximum scores from each village
Mean.Saliva = I averaged the saliva scores for all tests taken from each village
Prev = The saliva dataset also had serology tests for the dogs so I calculated the prevalence from this
Saliva.Prevlence = If the saliva score was above 916 it counts as positive and below 916 counted as negative. I used all the tests to calculate the saliva prevelence in each village
InfectionValuesdf <- read.csv('InfectionValuesdf.csv')
row.names(InfectionValuesdf) <- InfectionValuesdf$Group
InfectionValuesdf <- InfectionValuesdf[-1]
InfectionValueslist.imp <-imputePCA(InfectionValuesdf)
InfectionValuesdf.imp <- as.data.frame(InfectionValueslist.imp[[1]])
InfectionValuesdf <- InfectionValuesdf[,1:7]
InfectionValuesdf <- subset(InfectionValuesdf, !is.na(InfectionValuesdf$Mean.Saliva))
InfectionValuesdf
## Prev.Pos.Dogs.T1 Prev.Pos.Dogs.T3 Inc.MST2 Mean.Max.Saliva Mean.Saliva
## BCVN 0.7777778 0.9333333 0.06206897 12188.260 7068.542
## BMCA 0.9090909 0.6842105 0.04395604 18520.143 10038.423
## BVSV 0.5833333 0.7250000 0.03900709 6626.440 4106.023
## MGROS 0.5555556 0.8400000 0.01276596 2543.100 2543.100
## MQBA 0.5384615 0.6304348 0.06845238 9086.183 4540.988
## PABE 0.8000000 0.8421053 0.05128205 17818.580 12414.814
## PD 0.7894737 0.7000000 0.05442177 9657.033 6450.597
## PF 0.6666667 0.6666667 0.04687500 9708.000 5116.465
## Prev Saliva.Prevlence
## BCVN 0.7368421 0.7600000
## BMCA 0.6153846 0.7647059
## BVSV 0.4545455 0.6875000
## MGROS 0.0000000 0.1111111
## MQBA 0.5588235 0.8947368
## PABE 0.5714286 0.6666667
## PD 0.5714286 0.6250000
## PF 0.7058824 0.7083333
EnviroVardf <- read.csv('EnviroVardf.csv')
row.names(EnviroVardf) <- EnviroVardf$Group
EnviroVardf <- EnviroVardf[-1]
EnviroVardf
## Annual.Precipitation Wettest Driest Seasonal Tmin Tmax Precip
## ABCHAC 2545 465 23 74 233.6667 311.8333 212.0833
## BCVN 2712 513 19 79 236.9167 311.1667 226.0000
## BMCA 2565 472 21 75 234.1667 311.8333 213.7500
## BVSV 2675 502 22 78 236.1667 311.4167 222.9167
## CD 2533 463 21 74 233.8333 312.0000 211.0833
## CL 2804 536 16 80 238.0833 311.0833 233.6667
## CM 2506 457 21 75 234.0833 312.4167 208.8333
## FDR 2484 448 23 73 233.0000 312.4167 207.0000
## JUCACH 2492 451 22 73 233.0833 312.2500 207.6667
## MARU 2549 467 22 75 234.0833 311.9167 212.4167
## MGROS 2548 469 20 75 235.0000 312.0000 212.3333
## MQBA 2751 522 17 79 237.5833 311.0833 229.2500
## PABE 2608 484 21 76 235.5000 311.9167 217.3333
## PD 2595 480 22 76 234.4167 311.5000 216.2500
## PF 2742 520 17 79 237.2500 311.1667 228.5000
## SM 2506 455 21 74 233.3333 311.8333 208.8333
## VC 2634 491 23 77 235.0000 311.3333 219.5000
## VUPD 2586 477 22 75 234.4167 311.6667 215.5000
## NDVIMean NDVIMedian NDVIMax NDVIMin LSTMean LSTMedian LSTMax
## ABCHAC 71171480 72124656 83807144 39128224 300.5819 300.8271 303.0098
## BCVN 68978590 74344300 88563360 23014351 301.3609 301.3182 302.7504
## BMCA 76365818 81642252 89359640 22570000 301.6482 301.5077 303.9783
## BVSV 66396028 71211897 80306307 35999860 301.2537 301.3463 302.7621
## CD 67193928 70285952 85833000 33270751 301.3924 301.0569 303.1886
## CL 61414923 63564123 77102720 21825432 300.6765 300.5820 302.5384
## CM 71262930 75586501 85088944 21250836 302.0870 302.1326 303.5665
## FDR 59531105 67846856 83440456 10942584 299.9074 299.8832 301.1968
## JUCACH 76857983 83674620 90319288 11576548 300.0859 300.0298 302.0617
## MARU 68922772 76347064 87785160 14878260 301.0485 301.0889 302.8358
## MGROS 59298448 64433116 78529904 12650532 303.4854 303.2383 305.1004
## MQBA 68256453 73206280 82936463 31080920 301.2487 301.2910 303.3614
## PABE 57434785 55752292 83119800 24896600 302.8574 302.5098 305.1974
## PD 74303323 76214741 83793944 43079578 300.5899 301.1789 302.7252
## PF 72026454 75880300 86175296 34324358 301.3940 301.3448 303.0893
## SM 67031112 73060000 83226112 13844195 300.1174 300.0407 301.8558
## VC 74922352 76383299 83954072 51465236 301.0097 300.9769 303.6098
## VUPD 68107798 70802201 79956392 17223452 301.1254 301.6835 303.3354
## LSTMin EvapoMean EvapoMedian EvapoMax EvapoMin
## ABCHAC 296.1200 31.48407 30.30290 50.20264 14.176617
## BCVN 299.6700 34.04578 32.61080 59.02321 13.396488
## BMCA 300.3395 40.50005 39.14014 71.94923 15.479762
## BVSV 300.0952 32.44249 29.98976 51.49193 10.128520
## CD 299.6663 28.59371 27.74046 52.54673 8.543652
## CL 298.4493 1479.88998 1479.22331 1488.55842 1472.834867
## CM 300.4409 34.06431 31.96171 54.65224 13.881584
## FDR 299.0244 618.75475 619.07491 632.48888 597.756224
## JUCACH 298.6659 35.28196 36.10414 51.93369 11.716193
## MARU 298.0121 38.78923 38.19121 67.12448 15.347358
## MGROS 301.9213 28.18515 24.99208 49.68626 11.888706
## MQBA 299.2475 255.44454 255.16791 277.84252 236.315641
## PABE 301.4247 26.73978 24.50969 47.55969 11.947536
## PD 293.0273 38.08400 36.30581 58.62634 14.902667
## PF 299.5918 33.13820 31.46552 59.30126 8.860002
## SM 298.2349 3274.23554 3274.23863 3274.24589 3274.221180
## VC 299.6946 33.92231 33.79157 53.97996 13.031496
## VUPD 295.1025 30.39886 31.15986 49.19620 14.207052
Broken stick
One of these procedures consists is computing a broken stick model, which randomly divides a stick of unit length into the same number of pieces as there are PCA eigenvalues. The theoretical equation for the broken stick model is known. The pieces are then put in order of decreasing lengths and compared to the eigenvalues. One interprets only the axes whose eigenvalues are larger than the length of the corresponding piece of the stick.
env.pca <- rda(EnviroVardf, scale = TRUE)
env.pca
## Call: rda(X = EnviroVardf, scale = TRUE)
##
## Inertia Rank
## Total 19
## Unconstrained 19 16
## Inertia is correlations
##
## Eigenvalues for unconstrained axes:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13
## 6.936 5.144 3.500 1.356 0.932 0.566 0.245 0.168 0.094 0.045 0.007 0.003 0.003
## PC14 PC15 PC16
## 0.001 0.000 0.000
summary(env.pca) # Default scaling 2
##
## Call:
## rda(X = EnviroVardf, scale = TRUE)
##
## Partitioning of correlations:
## Inertia Proportion
## Total 19 1
## Unconstrained 19 1
##
## Eigenvalues, and their contribution to the correlations
##
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Eigenvalue 6.9365 5.1436 3.4995 1.35551 0.93222 0.56593 0.24532
## Proportion Explained 0.3651 0.2707 0.1842 0.07134 0.04906 0.02979 0.01291
## Cumulative Proportion 0.3651 0.6358 0.8200 0.89132 0.94039 0.97017 0.98308
## PC8 PC9 PC10 PC11 PC12 PC13
## Eigenvalue 0.168051 0.094069 0.045257 0.0070587 0.0033651 0.0025202
## Proportion Explained 0.008845 0.004951 0.002382 0.0003715 0.0001771 0.0001326
## Cumulative Proportion 0.991927 0.996878 0.999260 0.9996318 0.9998089 0.9999415
## PC14 PC15 PC16
## Eigenvalue 1.095e-03 1.538e-05 7.292e-07
## Proportion Explained 5.762e-05 8.094e-07 3.838e-08
## Cumulative Proportion 1.000e+00 1.000e+00 1.000e+00
##
## Scaling 2 for species and site scores
## * Species are scaled proportional to eigenvalues
## * Sites are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores: 4.239363
##
##
## Species scores
##
## PC1 PC2 PC3 PC4 PC5 PC6
## Annual.Precipitation -0.9291 0.19192 0.19285 0.0002482 0.070519 -0.007338
## Wettest -0.9362 0.17394 0.18126 -0.0102013 0.057368 -0.005666
## Driest 0.7436 -0.30070 0.07543 0.3759915 -0.114385 -0.215023
## Seasonal -0.9276 0.15368 0.16788 -0.0624964 0.005468 -0.029221
## Tmin -0.9530 0.12522 0.03109 -0.0914466 0.092377 0.011680
## Tmax 0.8233 -0.25622 -0.34949 -0.0867789 0.154320 -0.017635
## Precip -0.9291 0.19192 0.19285 0.0002482 0.070519 -0.007338
## NDVIMean 0.1941 -0.17483 0.81137 -0.2300436 -0.364726 0.110572
## NDVIMedian 0.3180 -0.07767 0.76601 -0.3860945 -0.170015 0.143287
## NDVIMax 0.3692 -0.27016 0.51478 -0.5739610 0.041864 -0.105037
## NDVIMin -0.3725 -0.20265 0.46611 0.4048829 -0.440981 -0.395562
## LSTMean -0.3649 -0.58605 -0.60603 -0.1901818 -0.231410 0.067448
## LSTMedian -0.3701 -0.62854 -0.51481 -0.0381138 -0.273125 0.227215
## LSTMax -0.3550 -0.61567 -0.47256 -0.0678009 -0.404251 0.057501
## LSTMin -0.2043 -0.21381 -0.49466 -0.5909345 0.066018 -0.477189
## EvapoMean 0.1640 0.90181 -0.23334 -0.0920573 -0.205243 -0.007130
## EvapoMedian 0.1644 0.90201 -0.23272 -0.0919657 -0.204801 -0.007040
## EvapoMax 0.1633 0.90147 -0.23286 -0.0948165 -0.206434 -0.006687
## EvapoMin 0.1628 0.90146 -0.23564 -0.0906428 -0.205751 -0.006546
##
##
## Site scores (weighted sums of species scores)
##
## PC1 PC2 PC3 PC4 PC5 PC6
## ABCHAC 0.6711 -0.325274 0.6061 1.5108 -0.44554 -0.29735
## BCVN -1.0519 -0.026000 0.7041 -1.1326 0.81667 0.00376
## BMCA 0.4003 -0.744075 0.4750 -1.6718 -0.98284 0.23451
## BVSV -0.7510 -0.113427 0.1497 0.8159 0.24699 -1.30435
## CD 0.5443 -0.516282 -0.1584 0.1104 0.06318 -1.08185
## CL -1.7901 2.029440 -0.4379 0.3918 0.87411 0.43693
## CM 0.6489 -0.877778 -0.5420 -0.9173 -0.50100 0.38687
## FDR 1.5041 0.631453 -0.4918 0.7461 2.54073 -1.08790
## JUCACH 1.5306 -0.239235 1.1809 -1.2590 1.08296 0.33008
## MARU 0.6533 -0.361026 0.2979 -0.4193 0.70775 0.66163
## MGROS -0.2613 -1.084487 -2.4527 -0.2313 -0.45366 0.92813
## MQBA -1.5341 0.312223 0.4480 -0.4449 0.19744 0.30453
## PABE -0.5694 -0.902448 -2.0549 0.3121 -0.06099 -0.96635
## PD 0.1629 -0.149258 1.3225 1.8159 -0.96521 1.29129
## PF -1.3990 -0.006176 0.8948 -0.9988 0.07998 0.05320
## SM 1.3707 3.036106 -0.8143 -0.6900 -1.78984 -0.05564
## VC -0.3102 -0.397765 0.9807 0.5972 -1.58472 -2.07649
## VUPD 0.1807 -0.265993 -0.1075 1.4647 0.17398 2.23899
summary(env.pca, scaling = 1)
##
## Call:
## rda(X = EnviroVardf, scale = TRUE)
##
## Partitioning of correlations:
## Inertia Proportion
## Total 19 1
## Unconstrained 19 1
##
## Eigenvalues, and their contribution to the correlations
##
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Eigenvalue 6.9365 5.1436 3.4995 1.35551 0.93222 0.56593 0.24532
## Proportion Explained 0.3651 0.2707 0.1842 0.07134 0.04906 0.02979 0.01291
## Cumulative Proportion 0.3651 0.6358 0.8200 0.89132 0.94039 0.97017 0.98308
## PC8 PC9 PC10 PC11 PC12 PC13
## Eigenvalue 0.168051 0.094069 0.045257 0.0070587 0.0033651 0.0025202
## Proportion Explained 0.008845 0.004951 0.002382 0.0003715 0.0001771 0.0001326
## Cumulative Proportion 0.991927 0.996878 0.999260 0.9996318 0.9998089 0.9999415
## PC14 PC15 PC16
## Eigenvalue 1.095e-03 1.538e-05 7.292e-07
## Proportion Explained 5.762e-05 8.094e-07 3.838e-08
## Cumulative Proportion 1.000e+00 1.000e+00 1.000e+00
##
## Scaling 1 for species and site scores
## * Sites are scaled proportional to eigenvalues
## * Species are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores: 4.239363
##
##
## Species scores
##
## PC1 PC2 PC3 PC4 PC5 PC6
## Annual.Precipitation -1.5377 0.3689 0.44937 0.0009291 0.31837 -0.04252
## Wettest -1.5495 0.3343 0.42235 -0.0381928 0.25899 -0.03283
## Driest 1.2307 -0.5779 0.17575 1.4076787 -0.51640 -1.24589
## Seasonal -1.5353 0.2954 0.39116 -0.2339808 0.02468 -0.16931
## Tmin -1.5772 0.2407 0.07244 -0.3423677 0.41704 0.06768
## Tmax 1.3625 -0.4924 -0.81433 -0.3248923 0.69669 -0.10218
## Precip -1.5377 0.3689 0.44937 0.0009291 0.31837 -0.04252
## NDVIMean 0.3213 -0.3360 1.89056 -0.8612627 -1.64659 0.64068
## NDVIMedian 0.5263 -0.1493 1.78487 -1.4455032 -0.76755 0.83024
## NDVIMax 0.6111 -0.5192 1.19949 -2.1488587 0.18900 -0.60861
## NDVIMin -0.6166 -0.3895 1.08608 1.5158455 -1.99085 -2.29198
## LSTMean -0.6039 -1.1264 -1.41210 -0.7120238 -1.04472 0.39081
## LSTMedian -0.6125 -1.2080 -1.19955 -0.1426947 -1.23305 1.31654
## LSTMax -0.5875 -1.1833 -1.10110 -0.2538403 -1.82503 0.33317
## LSTMin -0.3381 -0.4109 -1.15260 -2.2124058 0.29805 -2.76494
## EvapoMean 0.2714 1.7332 -0.54370 -0.3446543 -0.92659 -0.04131
## EvapoMedian 0.2721 1.7336 -0.54225 -0.3443115 -0.92459 -0.04079
## EvapoMax 0.2702 1.7326 -0.54258 -0.3549843 -0.93196 -0.03875
## EvapoMin 0.2694 1.7326 -0.54907 -0.3393586 -0.92888 -0.03793
##
##
## Site scores (weighted sums of species scores)
##
## PC1 PC2 PC3 PC4 PC5 PC6
## ABCHAC 0.40549 -0.169241 0.26010 0.40353 -0.09869 -0.051318
## BCVN -0.63555 -0.013528 0.30216 -0.30251 0.18090 0.000649
## BMCA 0.24189 -0.387145 0.20387 -0.44654 -0.21770 0.040473
## BVSV -0.45376 -0.059017 0.06425 0.21794 0.05471 -0.225112
## CD 0.32887 -0.268623 -0.06800 0.02948 0.01400 -0.186712
## CL -1.08162 1.055926 -0.18795 0.10466 0.19362 0.075408
## CM 0.39207 -0.456711 -0.23262 -0.24500 -0.11097 0.066768
## FDR 0.90882 0.328547 -0.21108 0.19929 0.56278 -0.187755
## JUCACH 0.92481 -0.124475 0.50681 -0.33628 0.23988 0.056967
## MARU 0.39474 -0.187843 0.12785 -0.11200 0.15677 0.114189
## MGROS -0.15789 -0.564263 -1.05262 -0.06178 -0.10049 0.160182
## MQBA -0.92691 0.162451 0.19227 -0.11884 0.04373 0.052557
## PABE -0.34406 -0.469547 -0.88192 0.08336 -0.01351 -0.166777
## PD 0.09845 -0.077660 0.56756 0.48502 -0.21380 0.222858
## PF -0.84531 -0.003213 0.38402 -0.26677 0.01772 0.009181
## SM 0.82818 1.579698 -0.34945 -0.18429 -0.39646 -0.009602
## VC -0.18740 -0.206959 0.42089 0.15951 -0.35102 -0.358372
## VUPD 0.10918 -0.138397 -0.04616 0.39121 0.03854 0.386417
# Eigenvalues
ev <- env.pca$CA$eig
# Scree plot and broken stick model
dev.new(title = "Scree plot of PCA eigenvalues", noRStudioGD = TRUE)
screeplot(env.pca, bstick = TRUE, npcs = length(env.pca$CA$eig))
Scaling Types
Scaling 1 : Distances among objects in the biplot are approximations of their Euclidean distances in multidimensional space. (2) The angles among descriptor vectors do not reflect their correlations. If the main interest of the analysis is to interpret the relationships among objects, choose scaling 1.
Scaling 2 : Distances among objects in the biplot are not approximations of their Euclidean distances in multidimensional space. (2) The angles between descriptors in the biplot reflect their correlations.1. If the main interest focuses on the relationships among descriptors, choose scaling 2.
dev.new(width = 12,
height = 6,
title = "PCA biplots - environmental variables - cleanplot.pca",
noRStudioGD = TRUE
)
par(mfrow = c(1, 2))
cleanplot.pca(env.pca, scaling = 1, mar.percent = 0.08)
cleanplot.pca(env.pca, scaling = 2, mar.percent = 0.04)
# Clustering the objects using the environmental data: Euclidean
# distance after standardizing the variables, followed by Ward
# clustering
env.w <- hclust(dist(scale(EnviroVardf)), "ward.D")
# Cut the dendrogram to yield 4 groups
gr <- cutree(env.w, k = 4)
grl <- levels(factor(gr))
# Extract the site scores, scaling 1
sit.sc1 <- scores(env.pca, display = "wa", scaling = 1)
# Plot the sites with cluster symbols and colours (scaling 1)
dev.new(title = "Ordination and clustering", noRStudioGD = TRUE)
p <- plot(
env.pca,
display = "wa",
scaling = 1,
type = "n",
main = "PCA correlation + clusters"
)
abline(v = 0, lty = "dotted")
abline(h = 0, lty = "dotted")
for (i in 1:length(grl)) {
points(sit.sc1[gr == i, ],
pch = (14 + i),
cex = 2,
col = i + 1)
}
text(sit.sc1, row.names(EnviroVardf), cex = 0.7, pos = 3)
# Add the dendrogram
ordicluster(p, env.w, col = "dark grey")
# Add legend interactively
#legend(
# locator(1),
#paste("Cluster", c(1:length(grl))),
#pch = 14 + c(1:length(grl)),
#col = 1 + c(1:length(grl)),
#pt.cex = 2
#)
inf.pca <- rda(InfectionValuesdf, scale = TRUE)
inf.pca
## Call: rda(X = InfectionValuesdf, scale = TRUE)
##
## Inertia Rank
## Total 7
## Unconstrained 7 7
## Inertia is correlations
##
## Eigenvalues for unconstrained axes:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 4.102 1.772 0.719 0.242 0.108 0.046 0.012
summary(inf.pca) # Default scaling 2
##
## Call:
## rda(X = InfectionValuesdf, scale = TRUE)
##
## Partitioning of correlations:
## Inertia Proportion
## Total 7 1
## Unconstrained 7 1
##
## Eigenvalues, and their contribution to the correlations
##
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Eigenvalue 4.102 1.7716 0.7186 0.24210 0.10771 0.045946 0.011999
## Proportion Explained 0.586 0.2531 0.1027 0.03459 0.01539 0.006564 0.001714
## Cumulative Proportion 0.586 0.8391 0.9417 0.97633 0.99172 0.998286 1.000000
##
## Scaling 2 for species and site scores
## * Species are scaled proportional to eigenvalues
## * Sites are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores: 2.645751
##
##
## Species scores
##
## PC1 PC2 PC3 PC4 PC5 PC6
## Prev.Pos.Dogs.T1 -0.7473 -0.5403 -0.19231 -0.305370 -0.120160 -0.06807
## Prev.Pos.Dogs.T3 0.1039 -0.7147 0.68901 0.006342 0.050572 -0.03354
## Inc.MST2 -0.7696 0.4765 0.34353 0.090395 -0.232186 0.01499
## Mean.Max.Saliva -0.9045 -0.3394 -0.18443 0.142592 0.066692 -0.03882
## Mean.Saliva -0.8038 -0.5185 -0.13268 0.237179 -0.006209 0.08940
## Prev -0.8939 0.2827 0.17354 -0.243349 0.133328 0.11767
## Saliva.Prevlence -0.8229 0.5273 0.08416 0.069509 0.120598 -0.12891
##
##
## Site scores (weighted sums of species scores)
##
## PC1 PC2 PC3 PC4 PC5 PC6
## BCVN -0.60234 -0.4370 2.07585 -0.79678 0.1069 -0.64581
## BMCA -0.96400 -0.6420 -1.54056 -0.35689 0.2825 -1.34892
## BVSV 0.59921 0.6000 0.02543 0.09193 1.2648 -0.69018
## MGROS 2.14133 -0.9562 -0.10429 0.15666 -0.3061 -0.09237
## MQBA -0.09347 1.7379 0.16225 1.33468 -0.7056 -0.44448
## PABE -0.84902 -1.2241 0.11205 1.56133 0.1138 1.10841
## PD -0.17551 0.1611 -0.35682 -1.03888 -1.8645 0.50026
## PF -0.05621 0.7602 -0.37391 -0.95204 1.1083 1.61309
summary(inf.pca, scaling = 1)
##
## Call:
## rda(X = InfectionValuesdf, scale = TRUE)
##
## Partitioning of correlations:
## Inertia Proportion
## Total 7 1
## Unconstrained 7 1
##
## Eigenvalues, and their contribution to the correlations
##
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Eigenvalue 4.102 1.7716 0.7186 0.24210 0.10771 0.045946 0.011999
## Proportion Explained 0.586 0.2531 0.1027 0.03459 0.01539 0.006564 0.001714
## Cumulative Proportion 0.586 0.8391 0.9417 0.97633 0.99172 0.998286 1.000000
##
## Scaling 1 for species and site scores
## * Sites are scaled proportional to eigenvalues
## * Species are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores: 2.645751
##
##
## Species scores
##
## PC1 PC2 PC3 PC4 PC5 PC6
## Prev.Pos.Dogs.T1 -0.9762 -1.0739 -0.6002 -1.6420 -0.96867 -0.8401
## Prev.Pos.Dogs.T3 0.1357 -1.4206 2.1505 0.0341 0.40768 -0.4140
## Inc.MST2 -1.0054 0.9472 1.0722 0.4861 -1.87176 0.1850
## Mean.Max.Saliva -1.1816 -0.6746 -0.5756 0.7667 0.53763 -0.4792
## Mean.Saliva -1.0501 -1.0307 -0.4141 1.2753 -0.05005 1.1035
## Prev -1.1677 0.5619 0.5417 -1.3085 1.07482 1.4524
## Saliva.Prevlence -1.0749 1.0480 0.2627 0.3738 0.97220 -1.5912
##
##
## Site scores (weighted sums of species scores)
##
## PC1 PC2 PC3 PC4 PC5 PC6
## BCVN -0.46110 -0.21984 0.665085 -0.14818 0.01326 -0.052321
## BMCA -0.73795 -0.32298 -0.493582 -0.06637 0.03504 -0.109286
## BVSV 0.45871 0.30186 0.008148 0.01710 0.15689 -0.055917
## MGROS 1.63921 -0.48103 -0.033414 0.02913 -0.03797 -0.007483
## MQBA -0.07155 0.87432 0.051985 0.24821 -0.08753 -0.036010
## PABE -0.64993 -0.61581 0.035899 0.29036 0.01411 0.089800
## PD -0.13435 0.08105 -0.114323 -0.19320 -0.23128 0.040530
## PF -0.04303 0.38243 -0.119798 -0.17705 0.13748 0.130687
# Eigenvalues
infev <- inf.pca$CA$eig
# Scree plot and broken stick model
dev.new(title = "Scree plot of PCA eigenvalues", noRStudioGD = TRUE)
screeplot(inf.pca, bstick = TRUE, npcs = length(inf.pca$CA$eig))
## Cleanplot PCA biplots: scaling 1 and scaling 2
dev.new(width = 12,
height = 6,
title = "PCA biplots - Infection variables - cleanplot.pca",
noRStudioGD = TRUE
)
par(mfrow = c(1, 2))
cleanplot.pca(inf.pca, scaling = 1, mar.percent = 0.08)
cleanplot.pca(inf.pca, scaling = 2, mar.percent = 0.04)
# Clustering the objects using the environmental data: Euclidean
# distance after standardizing the variables, followed by Ward
# clustering
inf.w <- hclust(dist(scale(InfectionValuesdf)), "ward.D")
# Cut the dendrogram to yield 4 groups
gr <- cutree(inf.w, k = 4)
grl <- levels(factor(gr))
# Extract the site scores, scaling 1
sit.sc1 <- scores(inf.pca, display = "wa", scaling = 1)
# Plot the sites with cluster symbols and colours (scaling 1)
dev.new(title = "Ordination and clustering", noRStudioGD = TRUE)
p <- plot(
inf.pca,
display = "wa",
scaling = 1,
type = "n",
main = "PCA correlation + clusters"
)
abline(v = 0, lty = "dotted")
abline(h = 0, lty = "dotted")
for (i in 1:length(grl)) {
points(sit.sc1[gr == i, ],
pch = (14 + i),
cex = 2,
col = i + 1)
}
text(sit.sc1, row.names(InfectionValuesdf), cex = 0.7, pos = 3)
# Add the dendrogram
ordicluster(p, inf.w, col = "dark grey")
# Add legend interactively
#legend(
# locator(1),
#paste("Cluster", c(1:length(grl))),
#pch = 14 + c(1:length(grl)),
#col = 1 + c(1:length(grl)),
#pt.cex = 2
#)
For the imputed data frame missing values were filled in using imputePCA
inf.imp.pca <- rda(InfectionValuesdf.imp, scale = TRUE)
inf.imp.pca
## Call: rda(X = InfectionValuesdf.imp, scale = TRUE)
##
## Inertia Rank
## Total 9
## Unconstrained 9 9
## Inertia is correlations
##
## Eigenvalues for unconstrained axes:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9
## 5.129 2.545 0.565 0.499 0.137 0.055 0.043 0.020 0.006
summary(inf.imp.pca) # Default scaling 2
##
## Call:
## rda(X = InfectionValuesdf.imp, scale = TRUE)
##
## Partitioning of correlations:
## Inertia Proportion
## Total 9 1
## Unconstrained 9 1
##
## Eigenvalues, and their contribution to the correlations
##
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Eigenvalue 5.1290 2.5450 0.5652 0.4995 0.13721 0.054739 0.04266
## Proportion Explained 0.5699 0.2828 0.0628 0.0555 0.01525 0.006082 0.00474
## Cumulative Proportion 0.5699 0.8527 0.9155 0.9710 0.98621 0.992292 0.99703
## PC8 PC9
## Eigenvalue 0.020306 0.0064063
## Proportion Explained 0.002256 0.0007118
## Cumulative Proportion 0.999288 1.0000000
##
## Scaling 2 for species and site scores
## * Species are scaled proportional to eigenvalues
## * Sites are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores: 3.567621
##
##
## Species scores
##
## PC1 PC2 PC3 PC4 PC5 PC6
## Prev.Pos.Dogs.T1 0.9714 -0.4741 0.04915 0.38736 -0.28100 0.119622
## Prev.Pos.Dogs.T3 0.6219 -0.7193 -0.40614 -0.57865 -0.05943 0.047958
## Inc.MST2 0.7879 0.6801 0.37467 -0.40777 -0.05376 0.074064
## Mean.Max.Saliva 1.1495 -0.1623 0.08168 0.12986 0.18503 0.009132
## Mean.Saliva 1.1077 -0.3478 0.05792 0.09441 0.20895 0.033567
## Prev 1.0237 0.5439 0.01351 -0.02806 -0.16343 -0.189072
## Saliva.Prevlence 0.8161 0.8454 0.07577 -0.01456 0.05341 0.049907
## HumanRevGroupModLambda 0.4273 0.8488 -0.68889 0.16717 0.02690 0.031906
## DogRevGroupModLambda 0.9261 -0.7185 -0.03210 0.02176 0.02201 -0.121864
##
##
## Site scores (weighted sums of species scores)
##
## PC1 PC2 PC3 PC4 PC5 PC6
## ABCHAC -1.03636 0.26612 -0.044951 0.22271 -0.286679 0.76559
## All -0.08213 0.01719 0.006013 0.03532 0.006155 -0.02621
## BCVN 0.79624 0.26392 -0.509215 -1.68743 -1.528627 -0.40687
## BMCA 0.83505 0.14287 0.413812 1.52915 1.032573 0.15980
## BVSV -0.29580 1.06722 -1.980845 0.18186 0.234192 1.30109
## CD 0.83892 -1.17097 -0.270483 -0.40191 -0.116166 0.24913
## CL 0.42336 0.79845 -1.742644 0.34888 0.426673 -0.18651
## CM -0.18274 -0.60597 -0.012914 0.24330 0.583622 -1.64942
## FDR -1.87605 0.53767 0.935772 0.12954 0.264089 -0.63524
## JUCACH 0.01415 -0.60336 0.131791 1.02751 -0.688226 0.32073
## MARU -0.62213 0.12197 -0.082952 1.01997 -0.317605 -0.02302
## MGROS -1.42854 -1.57322 -0.007298 -0.89742 -0.063719 0.70759
## MQBA -0.10604 1.41825 1.508961 -0.78869 0.436117 1.16889
## PABE 0.94165 -0.46838 0.442739 -0.28895 2.007664 -0.04833
## PD 0.16065 0.32151 0.706839 0.12406 -1.134836 0.14348
## PF 0.06631 0.90421 0.004924 0.32805 -0.756406 -2.06608
## SM 0.30948 -1.60428 -0.111311 0.76248 -0.699273 0.36676
## VC -0.26307 -0.20772 -0.465263 -1.65294 1.091834 -0.63182
## VUPD 1.50705 0.37452 1.077024 -0.23548 -0.491383 0.49045
summary(inf.imp.pca, scaling = 1)
##
## Call:
## rda(X = InfectionValuesdf.imp, scale = TRUE)
##
## Partitioning of correlations:
## Inertia Proportion
## Total 9 1
## Unconstrained 9 1
##
## Eigenvalues, and their contribution to the correlations
##
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Eigenvalue 5.1290 2.5450 0.5652 0.4995 0.13721 0.054739 0.04266
## Proportion Explained 0.5699 0.2828 0.0628 0.0555 0.01525 0.006082 0.00474
## Cumulative Proportion 0.5699 0.8527 0.9155 0.9710 0.98621 0.992292 0.99703
## PC8 PC9
## Eigenvalue 0.020306 0.0064063
## Proportion Explained 0.002256 0.0007118
## Cumulative Proportion 0.999288 1.0000000
##
## Scaling 1 for species and site scores
## * Sites are scaled proportional to eigenvalues
## * Species are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores: 3.567621
##
##
## Species scores
##
## PC1 PC2 PC3 PC4 PC5 PC6
## Prev.Pos.Dogs.T1 1.2868 -0.8916 0.19613 1.64430 -2.2758 1.5339
## Prev.Pos.Dogs.T3 0.8238 -1.3526 -1.62068 -2.45630 -0.4813 0.6149
## Inc.MST2 1.0436 1.2789 1.49511 -1.73095 -0.4354 0.9497
## Mean.Max.Saliva 1.5226 -0.3052 0.32593 0.55126 1.4985 0.1171
## Mean.Saliva 1.4673 -0.6541 0.23111 0.40077 1.6922 0.4304
## Prev 1.3561 1.0228 0.05392 -0.11910 -1.3236 -2.4244
## Saliva.Prevlence 1.0810 1.5898 0.30236 -0.06182 0.4325 0.6399
## HumanRevGroupModLambda 0.5660 1.5961 -2.74902 0.70962 0.2179 0.4091
## DogRevGroupModLambda 1.2268 -1.3512 -0.12811 0.09236 0.1782 -1.5626
##
##
## Site scores (weighted sums of species scores)
##
## PC1 PC2 PC3 PC4 PC5 PC6
## ABCHAC -0.78236 0.141514 -0.011265 0.052466 -0.035398 0.059706
## All -0.06200 0.009141 0.001507 0.008321 0.000760 -0.002044
## BCVN 0.60109 0.140344 -0.127607 -0.397522 -0.188747 -0.031731
## BMCA 0.63039 0.075974 0.103700 0.360234 0.127496 0.012462
## BVSV -0.22330 0.567515 -0.496392 0.042842 0.028917 0.101469
## CD 0.63331 -0.622684 -0.067782 -0.094681 -0.014344 0.019429
## CL 0.31960 0.424590 -0.436700 0.082189 0.052683 -0.014545
## CM -0.13795 -0.322234 -0.003236 0.057316 0.072062 -0.128635
## FDR -1.41625 0.285918 0.234501 0.030516 0.032608 -0.049541
## JUCACH 0.01068 -0.320848 0.033026 0.242058 -0.084978 0.025013
## MARU -0.46966 0.064858 -0.020788 0.240283 -0.039216 -0.001796
## MGROS -1.07842 -0.836588 -0.001829 -0.211413 -0.007868 0.055184
## MQBA -0.08005 0.754180 0.378140 -0.185799 0.053849 0.091159
## PABE 0.71086 -0.249069 0.110949 -0.068071 0.247895 -0.003769
## PD 0.12128 0.170967 0.177131 0.029225 -0.140123 0.011190
## PF 0.05006 0.480830 0.001234 0.077281 -0.093397 -0.161129
## SM 0.23363 -0.853104 -0.027894 0.179623 -0.086342 0.028603
## VC -0.19860 -0.110461 -0.116593 -0.389396 0.134814 -0.049274
## VUPD 1.13769 0.199158 0.269898 -0.055473 -0.060673 0.038249
# Eigenvalues
infev <- inf.imp.pca$CA$eig
# Scree plot and broken stick model
dev.new(title = "Scree plot of PCA eigenvalues", noRStudioGD = TRUE)
screeplot(inf.imp.pca, bstick = TRUE, npcs = length(inf.imp.pca$CA$eig))
## Cleanplot PCA biplots: scaling 1 and scaling 2
dev.new(width = 12,
height = 6,
title = "PCA biplots - Infection variables Imputed- cleanplot.pca",
noRStudioGD = TRUE
)
par(mfrow = c(1, 2))
cleanplot.pca(inf.imp.pca, scaling = 1, mar.percent = 0.08)
cleanplot.pca(inf.imp.pca, scaling = 2, mar.percent = 0.04)
# Clustering the objects using the environmental data: Euclidean
# distance after standardizing the variables, followed by Ward
# clustering
inf.imp.w <- hclust(dist(scale(InfectionValuesdf.imp)), "ward.D")
# Cut the dendrogram to yield 4 groups
gr <- cutree(inf.imp.w, k = 4)
grl <- levels(factor(gr))
# Extract the site scores, scaling 1
sit.sc1 <- scores(inf.imp.pca, display = "wa", scaling = 1)
# Plot the sites with cluster symbols and colours (scaling 1)
dev.new(title = "Ordination and clustering", noRStudioGD = TRUE)
p <- plot(
inf.imp.pca,
display = "wa",
scaling = 1,
type = "n",
main = "PCA correlation + clusters"
)
abline(v = 0, lty = "dotted")
abline(h = 0, lty = "dotted")
for (i in 1:length(grl)) {
points(sit.sc1[gr == i, ],
pch = (14 + i),
cex = 2,
col = i + 1)
}
text(sit.sc1, row.names(InfectionValuesdf.imp), cex = 0.7, pos = 3)
# Add the dendrogram
ordicluster(p, inf.imp.w, col = "dark grey")
# Add legend interactively
#legend(
# locator(1),
#paste("Cluster", c(1:length(grl))),
#pch = 14 + c(1:length(grl)),
#col = 1 + c(1:length(grl)),
#pt.cex = 2
#)
EnviroVardf <-EnviroVardf[c(row.names(InfectionValuesdf)),]
RDA <- rda(InfectionValuesdf, EnviroVardf)
coef(RDA)
## RDA1 RDA2
## Annual.Precipitation 1.109655e+00 1.863965e-01
## Wettest -5.232265e+00 -8.068760e-01
## Driest -1.344334e+00 -4.686606e-01
## Seasonal 1.120137e+01 2.407766e+00
## Tmin 2.554251e+00 -1.861547e+00
## Tmax 4.089716e+00 7.955711e-01
## Precip NA NA
## NDVIMean -4.590814e-09 -1.445956e-07
## NDVIMedian NA NA
## NDVIMax NA NA
## NDVIMin NA NA
## LSTMean NA NA
## LSTMedian NA NA
## LSTMax NA NA
## LSTMin NA NA
## EvapoMean NA NA
## EvapoMedian NA NA
## EvapoMax NA NA
## EvapoMin NA NA
# Unadjusted R^2 retrieved from the rda object
R2 <- RsquareAdj(RDA)$r.squared
# Adjusted R^2 retrieved from the rda object
R2adj <- RsquareAdj(RDA)$adj.r.squared
# Scaling 1
plot(RDA,
scaling = 1,
display = c("sp", "lc", "cn"),
main = "Triplot RDA ~ env3 - scaling 1 - lc scores"
)