Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
## Warning: package 'tidyverse' was built under R version 4.0.5
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.4
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'purrr' was built under R version 4.0.3
## Warning: package 'dplyr' was built under R version 4.0.5
## Warning: package 'stringr' was built under R version 4.0.3
## Warning: package 'forcats' was built under R version 4.0.3
## Warning: package 'readxl' was built under R version 4.0.3
## Warning: package 'devtools' was built under R version 4.0.5
## Warning: package 'usethis' was built under R version 4.0.5
## Warning: package 'plyr' was built under R version 4.0.5
## Warning: package 'scales' was built under R version 4.0.3
## Warning: package 'broom' was built under R version 4.0.5
## Warning: package 'cowplot' was built under R version 4.0.3
#CODE TO MAKE COLUMN 1st THE ROW NAMES
rownames(jan2019pca)<-jan2019pca$names
## Warning: Setting row names on a tibble is deprecated.
jan2019pca
## # A tibble: 37 x 9
## names Watershed Chloride Calcium Hardness Magnesium Potassium Sodium Sulfate
## * <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ocono~ Rock 63.6 50.7 262 33 2.42 28.9 17
## 2 Rubic~ Rock 135 66.5 317 36.8 3.13 71.8 28.7
## 3 Ocono~ Rock 45.8 74 335 36.5 2.56 21.2 21.5
## 4 Bark ~ Rock 116 68.2 298 31 2.42 66 18.6
## 5 Bark ~ Rock 104 56.5 283 34.4 2.43 52.5 20.4
## 6 Mukwo~ Fox 49.1 60.5 286 32.8 1.94 22.8 20.8
## 7 Pebbl~ Fox 75.6 71.6 311 32.2 2.45 37.4 20.5
## 8 Fox R~ Fox 254 69.8 319 35.2 2.98 145 39.3
## 9 Cedar~ Milwaukee 64.1 51.1 235 26.1 4.19 32.5 14.8
## 10 Ulao ~ Milwaukee 83.1 50.1 229 25.2 4.29 46.3 16.2
## # ... with 27 more rows
#ONLY USES NUMERIC DATA COLUMNS FOR PCA
pca_fit <- jan2019pca %>%
select(where(is.numeric)) %>% # retain only numeric columns
prcomp(scale. = TRUE,center=TRUE) # do PCA on scaled data
object<-summary(pca_fit)#Screeplot for data
object
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.0980 1.2395 0.79098 0.53779 0.38197 0.03425 0.003156
## Proportion of Variance 0.6288 0.2195 0.08938 0.04132 0.02084 0.00017 0.000000
## Cumulative Proportion 0.6288 0.8483 0.93767 0.97899 0.99983 1.00000 1.000000
screeplot(object,npcs=7,type = c("bar")
,main = deparse1(substitute()))
#THIS PLOT SUCKS
#try ggplot for scree create new object
ggobj<-data.frame(PC=paste0("PC",1:7),var_explained=(object$sdev)^2/sum((object$sdev)^2))
ggobj
## PC var_explained
## 1 PC1 6.288287e-01
## 2 PC2 2.194632e-01
## 3 PC3 8.937862e-02
## 4 PC4 4.131719e-02
## 5 PC5 2.084327e-02
## 6 PC6 1.676140e-04
## 7 PC7 1.422770e-06
ggobj %>%
ggplot(aes(x=PC,y=var_explained, group=1))+
geom_point(size=4)+
geom_col()+
labs(title="Scree plot: PCA on scaled data")
ggobj %>%
ggplot(aes(x=PC,y=var_explained, group=1))+
geom_point(size=4)+
geom_line()+
labs(title="Scree plot: PCA on scaled data")
pca_fit$rotation # table with data braking out variation for each pc
## PC1 PC2 PC3 PC4 PC5
## Chloride 0.3634670 0.4562330 0.39475189 -0.04639329 0.009698938
## Calcium 0.4315411 -0.1565191 -0.31311886 -0.36651770 0.539272976
## Hardness 0.4462788 -0.2224627 -0.19801112 -0.27946600 0.033385733
## Magnesium 0.4164934 -0.3046157 0.01605694 -0.10543972 -0.787604348
## Potassium -0.1909283 0.6065377 -0.58296009 -0.41294434 -0.291903843
## Sodium 0.3603507 0.4656795 0.38487929 -0.07584405 0.048505383
## Sulfate 0.3777913 0.2008170 -0.46763956 0.77331644 0.009733739
## PC6 PC7
## Chloride -0.708281220 -0.0023460536
## Calcium -0.024195214 0.5152617529
## Hardness -0.003242948 -0.7955569023
## Magnesium 0.021530155 0.3187208998
## Potassium -0.009141082 0.0012336093
## Sodium 0.705017040 0.0019345506
## Sulfate 0.012068744 0.0008723285
ggbiplot(pca_fit)#plot no names
ggbiplot(pca_fit,labels = rownames(jan2019pca))#plot with site names
#ggbiplot(pca_fit,ellipse = TRUE, labels=rownames(jan2019pca)) #plot with ellipses....????
ggbiplot(pca_fit,group=jan2019pca$Watershed,ellipse = TRUE,)+
scale_color_discrete(name="WaterShed")+
labs(title = "PCA:Ion Variables January 2019")+
theme_cowplot()