========
========
filepath = "/Users/emvanmetre/Desktop/Arctic/X-Y_Plots/data/processed/Combined/"
years = c("2022", "2023")
seasons = c("Spring", "Summer","Fall")
sites = c("TNHA", "BEO")
# put season values for the season that has the start of the data
date_start = "2022-06-01" # data starts in June 2022 (YEAR-MO-DY) where day is always 01
# put season values for the season that has the last of the data
date_end = "2023-11-01" # data ends after November of 2023 (will get data up UNTIL date_end not after)
scree = F # scree plot
eigen = T # eigenvectors and eigenvalues
spring_months = c("March", "April", "May")
summer_months = c("June","July","August")
fall_months = c("September", "October", "November")
winter_months = c("December", "January", "February")
spring_dates = data.frame(months=spring_months, start=c("-03-01","-04-01","-05-01"), end=c("-04-01","-05-01","-06-01"))
summer_dates = data.frame(months=summer_months, start=c("-06-01","-07-01","-08-01"), end=c("-07-01","-08-01","-09-01"))
fall_dates = data.frame(months=fall_months, start=c("-09-01","-10-01","-11-01"), end=c("-10-01","-11-01","-12-01"))
winter_dates = data.frame(months=winter_months, start=c("-12-01","-01-01","-02-01"), end=c("-01-01","-02-01","-03-01"))
all_dates = data.frame(matrix(nrow = 0, ncol = 4))
for(yur in years){
# spring
if("Spring" %in% seasons){
for(i in c(1:3)){
curdate = as.Date(paste0(yur, spring_dates[i, 2]))
if(curdate >= date_start && curdate < date_end){
all_dates <- rbind(all_dates, c(spring_dates[i,1], paste0(yur, spring_dates[i, 2]), paste0(yur, spring_dates[i, 3]),"Spring"))
}
}
}
if("Summer" %in% seasons){
for(i in c(1:3)){
curdate = as.Date(paste0(yur, summer_dates[i, 2]))
if(curdate >= date_start && curdate < date_end){
all_dates <- rbind(all_dates, c(summer_dates[i,1], paste0(yur, summer_dates[i, 2]), paste0(yur, summer_dates[i, 3]),"Summer"))
}
}
}
if("Fall" %in% seasons){
for(i in c(1:3)){
curdate = as.Date(paste0(yur, fall_dates[i, 2]))
if(curdate >= date_start && curdate < date_end){
all_dates <- rbind(all_dates, c(fall_dates[i,1], paste0(yur, fall_dates[i, 2]), paste0(yur, fall_dates[i, 3]),"Fall"))
}
}
}
if("Winter" %in% seasons){
for(i in c(1:3)){
curdate = as.Date(paste0(yur, winter_dates[i, 2]))
if(curdate >= date_start && curdate < date_end){
all_dates <- rbind(all_dates, c(winter_dates[i,1], paste0(yur, winter_dates[i, 2]), paste0(yur, winter_dates[i, 3]),"Winter"))
}
}
}
}
colnames(all_dates) = c("months","start","end","szn")
library(dplyr)
library(lubridate)
library(tidyverse)
=========================
=========================
Temporal Range: Season Vertical Spatial Range: 30-45 cm Horizontal Spatial Range: stations across site (TNHA, SSMH, BEO) –> Average Total Site –> North vs South (except for BEO)
pick_site <- function(cursite){
gtfile = paste0("GroundTemperature_",szn,yr,"_DAILY.csv")
airfile = paste0("AirTemperature_",szn,yr,"_DAILY.csv")
vwcfile = paste0("VWC_",szn,yr,"_DAILY.csv")
solfile = paste0("Solar_",szn,yr,"_DAILY.csv")
windspeed = paste0("WindSpeed_",szn,yr,"_DAILY.csv")
winddir = paste0("WindDirection_",szn,yr,"_DAILY.csv")
grndtmp <<- read.csv(paste0(filepath, gtfile))
airtmp <<- read.csv(paste0(filepath, airfile))
vwc <<- read.csv(paste0(filepath, vwcfile))
solar <<- read.csv(paste0(filepath, solfile))
wind <<- read.csv(paste0(filepath, windspeed))
pca_ground = grndtmp %>% filter(site == cursite) %>% filter(depth == "30cm")
pca_air = airtmp %>% filter(site == cursite)
pca_wind = wind %>% filter(site == cursite)
pca_solar = solar %>% filter(site == cursite)
pca_vwc = vwc %>% filter(site==cursite) %>% filter(depth == "30-45cm")
big_df <<- full_join(pca_ground, pca_air, by=c("Date", "site", "station", "fullname")) %>%
select(Date, site, station, fullname, avg.x, avg.y) %>% rename("groundtemp"="avg.x", "airtemp"="avg.y")
big_df <<- full_join(big_df, pca_wind, by=c("Date", "site", "station", "fullname")) %>%
select(Date, site, station, fullname, groundtemp, airtemp, avg) %>% rename("windspeed" = avg)
big_df <<- full_join(big_df, pca_solar, by=c("Date", "site", "station", "fullname")) %>%
select(Date, site, station, fullname, groundtemp, airtemp, windspeed, avg) %>% rename("solar" = avg)
big_df <<- full_join(big_df, pca_vwc, by=c("Date", "site", "station", "fullname")) %>%
select(Date, site, station, fullname, groundtemp, airtemp, windspeed, solar, avg) %>% rename("vwc" = avg)
if(szn == "Winter"){
big_df <<- big_df %>% select(-solar)
}
return(big_df)
}
pick_dates <- function(datemin, datemax, big_df){
pca_df <<- big_df %>% filter(Date >= datemin) %>% filter(Date < datemax)
# get rid of NAs
pca_df <<- na.omit(pca_df)
pca_df <<- unique(pca_df)
return(pca_df)
}
calc_pca <- function(pca_df){
pca <<- prcomp(pca_df[,5:ncol(pca_df)], center=TRUE, scale.=TRUE)
#take out variables
sd <- pca$sdev
loads <<- pca$rotation
rownames(loads) <<- colnames(pca_df[5:ncol(pca_df)])
scores <<- pca$x
var <- sd^2
varPercent <- var/sum(var) * 100
return(list("pca"=pca, "loads"=loads))
}
make_scree <- function(pca){
sd <- pca$sdev
var <- sd^2
varPercent <- var/sum(var) * 100
barplot(varPercent, xlab="PC", ylab="Percent Variance", names.arg=1:length(varPercent),
las=1, ylim=c(0, max(varPercent)), col="gray")
abline(h=1/ncol(pca_df[5:ncol(pca_df)])*100, col="red")
}
make_eigen <- function(pca){
eigenvectors <- pca$rotation
print("Eigenvectors (Loadings):")
print(eigenvectors)
print("Loadings Cutoff:")
sqrt(1/ncol(pca_df[5:ncol(pca_df)])) # cutoff for "important" loadings
# Access the eigenvalues (variances of the principal components)
eigenvalues <- (pca$sdev)^2
print("Eigenvalues:")
print(eigenvalues)
}
===============
===============
make_pca <- function(pca_df, szn, yr, site){
if(site == "TNHA"){
SOUTH <<- pca_df$fullname == "TNHA-SA"
NORTH <<- pca_df$fullname == "TNHA-SC"
s <- "TNHA-SA"
n <- "TNHA-SC"
} else{
if(site == "SSMH"){
SOUTH <<- pca_df$fullname == "SSMH-SA"
NORTH <<- pca_df$fullname == "SSMH-SB"
s <- "SSMH-SA"
n <- "SSMH-SB"
} else {
SOUTH <<- pca_df$fullname == "BEO-BASE"
n <- "BEO"
}
}
scaling <- 2
textNudge <- 1.1
limNudge <- 1.3
xlimit <- seq(floor(min(scores[,1])*limNudge),ceiling(max(scores[,1])*limNudge), 1)
ylimit <- seq(floor(min(scores[,2])*limNudge),ceiling(max(scores[,2])*limNudge), 1)
plot(scores[, 1], scores[, 2], xlab="PCA 1", ylab="PCA 2", type="n", asp=1,
las=1, xaxt='n', yaxt='n')
axis(side = 1, at=xlimit)
axis(side = 2, at=ylimit)
title(paste0(szn, " ", yr," Principal Component Analysis: ",site," North v. South\n",format(as.Date(min(pca_df$Date)), format="%B %d %Y")," - ",format(as.Date(max(pca_df$Date)), format="%B %d %Y")), adj=0.5)
points(scores[SOUTH, 1], scores[SOUTH, 2], pch=16, cex=0.7, col="mediumturquoise")
if(site != "BEO"){
points(scores[NORTH, 1], scores[NORTH, 2], pch=16, cex=0.7, col="salmon")
legend(x = "topright", # Position
legend = c(paste0(s, " (south)"), paste0(n, " (north)")), # Legend texts
col = c("mediumturquoise","salmon"),
pch = 19) #colors
} else{
legend(x = "topright", # Position
legend = "BEO", # Legend texts
col = "mediumturquoise",
pch = 19)
}
arrows(0, 0, loads[, 1]* scaling, loads[, 2]* scaling, length=0.1, angle=20, col="darkred")
text(loads[1, 1]*scaling*textNudge, loads[1, 2]*scaling*textNudge, rownames(loads)[1], col="darkred", cex=0.7) # ground label
text(loads[2, 1]*scaling*textNudge, loads[2, 2]*scaling*textNudge+0.2, rownames(loads)[2], col="darkred", cex=0.7) # air label
if(nrow(loads) > 2){
text(loads[3, 1]*scaling*textNudge, loads[3, 2]*scaling*textNudge, rownames(loads)[3], col="darkred", cex=0.7) # wind label
if(nrow(loads)>3){
text(loads[4, 1]*scaling*textNudge-0.2, loads[4, 2]*scaling*textNudge, rownames(loads)[4], col="darkred", cex=0.7) # solar label
if(nrow(loads)>4){
text(loads[5, 1]*scaling*textNudge, loads[5, 2]*scaling*textNudge, rownames(loads)[5], col="darkred", cex=0.7) # vwc label
}
}
}
#text(-3, 1]*scaling*textNudge, 1, "TNHA-SA \n(south)", col="mediumturquoise")
#text(1, 1, "TNHA-SC \n(north)", col="salmon")
}
for(i in c(1:nrow(all_dates))){
month <- all_dates$months[i]
startdate <- all_dates$start[i]
enddate <- all_dates$end[i]
szn <<- all_dates$szn[i]
yr <<- substr(all_dates$start[i], 1, 4)
for(site in sites){
big_df <- pick_site(site)
pca_df <- pick_dates(startdate, enddate, big_df)
if(nrow(pca_df > 0)){
p <- calc_pca(pca_df)
pca <- p$pca
loads <- p$loads
if(scree == T){
make_scree(pca)
}
if(eigen == T){
make_eigen(pca)
}
make_pca(pca_df, szn, yr, site)
}
}
}
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp 0.5476145 0.4045920 -0.03918486 -0.0580378 -0.72905408
## airtemp 0.3724533 -0.4286214 0.66305490 -0.4856979 0.04492273
## windspeed 0.3248552 -0.5207118 -0.73361839 -0.2912401 0.01765196
## solar 0.4449430 -0.3442438 0.11958841 0.8148957 0.07187135
## vwc 0.5078305 0.5127810 -0.07953252 -0.1088734 0.67895877
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.57644726 1.07455295 0.72798280 0.52963666 0.09138032
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp -0.4860508 -0.4226322 0.07325217 -0.7465687 0.14968635
## airtemp -0.5022990 -0.1458409 0.41191297 0.5508986 0.50325392
## windspeed 0.3542183 0.1812267 0.88077177 -0.2541337 -0.03665269
## solar -0.5394409 0.1417582 0.19212997 0.1300206 -0.79692465
## vwc 0.3081936 -0.8643922 0.11085571 0.2401179 -0.29647458
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.6295396 0.9295922 0.7937236 0.3626124 0.2845322
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp 0.3592753 -0.63344711 -0.25652697 0.3370505 -0.5387549
## airtemp 0.4467041 0.08230121 0.78786113 0.4078843 0.0811617
## windspeed -0.1582638 -0.76345645 0.28783441 -0.3192084 0.4553506
## solar 0.5757197 0.08961316 0.08803741 -0.7698935 -0.2450098
## vwc -0.5611382 -0.03278706 0.47209059 -0.1593657 -0.6601378
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.1504093 1.3267213 0.7880669 0.4249392 0.3098633
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp -0.5252442 -0.03932148 0.21139271 0.47524974 0.6723267
## airtemp -0.4367753 -0.48698738 0.57352415 -0.09309700 -0.4842248
## windspeed 0.3767501 0.48249405 0.78169155 -0.04663773 0.1097366
## solar -0.4593501 0.28559019 -0.03647871 -0.80565266 0.2388072
## vwc 0.4247385 -0.66853186 0.11836723 -0.33796352 0.4944009
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.6493396 0.8693144 0.6830215 0.5466514 0.2516731
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp 0.5372070 -0.2682398 0.2704804 0.00573965 0.7525048
## airtemp 0.4802015 -0.1533955 -0.2337970 -0.77241157 -0.3075641
## windspeed -0.2732646 -0.7710632 -0.5532303 0.10359450 0.1182893
## solar 0.4083375 0.4344661 -0.7185460 0.33766224 0.1190609
## vwc -0.4892873 0.3481637 -0.2231750 -0.52782555 0.5576496
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.7981110 1.0390136 0.5430929 0.4820706 0.1377119
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp -0.5404616 0.2176303 -0.02372722 0.7658419 -0.2710379
## airtemp -0.5649587 0.3237355 -0.01395118 -0.2358723 0.7212397
## windspeed 0.2916657 0.2458451 -0.89683997 0.1626742 0.1539691
## solar -0.5284697 -0.1760414 -0.40162623 -0.5164223 -0.5115991
## vwc 0.1561213 0.8697137 0.18334893 -0.2543533 -0.3477236
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.3391180 1.1371182 0.8903281 0.4207271 0.2127086
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp -0.5321448 0.002237055 0.03624474 -0.8411960 -0.08883964
## airtemp -0.5478684 0.143017918 -0.14729444 0.2594716 0.76835211
## windspeed 0.1802992 0.778330875 0.57880984 -0.1006336 0.12862853
## solar -0.4248978 -0.372082405 0.73321750 0.3206699 -0.20144342
## vwc -0.4512385 0.485073813 -0.32305098 0.3348245 -0.58704156
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.8938153 1.1994165 0.5210513 0.2526733 0.1330435
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp 0.4662444 0.3879798 -0.20869428 -0.7258858 -0.2482424
## airtemp 0.7429921 -0.0737595 0.04527871 0.2094164 0.6297753
## windspeed 0.1306295 0.5528159 -0.49148052 0.6083399 -0.2563200
## solar 0.4072216 -0.6594882 -0.13310129 0.1520747 -0.5986679
## vwc 0.2183699 0.3217159 0.83374306 0.1898166 -0.3430097
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 1.4410752 1.3323563 1.0319179 0.7900802 0.4045703
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp -0.56672861 0.05840738 -0.25411763 -0.23517491 0.74533500
## airtemp -0.48783723 -0.34589749 -0.05059914 0.79212154 -0.11114382
## windspeed 0.02783611 -0.83519168 0.43800241 -0.29974728 0.14136994
## solar -0.36011428 0.41798772 0.83374092 0.01137321 -0.01872672
## vwc -0.55711432 -0.06844428 -0.21422867 -0.47671631 -0.64170604
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.88262567 1.23324777 0.59077857 0.26221529 0.03113271
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp -0.5645711 -0.24798478 0.13263297 -0.4221279 0.65113711
## airtemp -0.1168635 -0.69311362 -0.67221898 0.2147269 -0.08916520
## windspeed 0.2027445 -0.63099638 0.70258390 0.2556534 -0.04189767
## solar -0.5182810 0.23929893 0.08071101 0.8038413 0.14644369
## vwc -0.5982323 -0.05173645 0.17433228 -0.2533395 -0.73815233
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.62660827 1.31058436 0.70599845 0.33159551 0.02521341
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp 0.6673474 -0.03451116 -0.21848210 0.3963036 0.5904790
## airtemp 0.4661829 -0.50753216 -0.08636693 0.1661623 -0.7000110
## windspeed 0.1248567 -0.59631953 0.54721588 -0.4639113 0.3378682
## solar 0.4964658 0.42038665 -0.19363992 -0.7241383 -0.1221641
## vwc 0.2743236 0.45705379 0.77965678 0.2752156 -0.1795552
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 1.9234517 1.8872209 0.7107573 0.3113126 0.1672575
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp 0.17658559 0.6965595 -0.3722293 0.44161857 0.38735104
## airtemp -0.34312984 0.5197945 -0.1826984 -0.75500432 -0.09308803
## windspeed 0.05194032 0.4400026 0.8933571 0.06722204 -0.03307792
## solar -0.63230526 0.1379988 -0.0888431 0.47642493 -0.58844931
## vwc -0.66975496 -0.1788088 0.1486158 0.05866852 0.70279944
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 1.8998175 1.3616674 0.9122898 0.6605397 0.1656856
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp 0.6193954 0.03807649 0.012384938 0.1155681 0.77549348
## airtemp 0.5071285 0.39487159 0.140932827 0.5546232 -0.50934090
## windspeed 0.1292896 0.72920686 -0.009407754 -0.6707723 -0.03895668
## solar 0.3890054 -0.45400647 0.644283279 -0.4138480 -0.23702710
## vwc 0.4371831 -0.32366973 -0.751528511 -0.2404818 -0.28545094
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.0659846 1.4011772 0.7138162 0.4885925 0.3304295
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp -0.4928330 0.32647984 0.2262052 -0.73802810 -0.23382095
## airtemp 0.1813779 0.92475594 0.1208391 0.30639388 0.05872997
## windspeed -0.4159177 0.16935761 -0.8878946 0.09246864 -0.03772554
## solar 0.5187891 0.09353516 -0.3080646 -0.57275961 0.54694762
## vwc 0.5311196 0.02839957 -0.2257621 -0.15758584 -0.80081436
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 3.0272627 1.0188230 0.5644705 0.2162281 0.1732157
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp -0.5596809 0.24384971 -0.07162882 -0.4802551 -0.62571472
## airtemp -0.4431436 0.34174682 0.48931076 0.6677492 -0.03897118
## windspeed -0.3476091 -0.48844324 -0.62673199 0.4687732 -0.16748078
## solar -0.1466919 -0.76489778 0.59703192 -0.1503673 -0.11981412
## vwc -0.5899410 -0.01005067 -0.07876663 -0.2847935 0.75137015
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.65004402 1.03512242 0.92354650 0.30827202 0.08301504
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp 0.5455998 -0.1400451 -0.2846963 0.3235778 0.7049493826
## airtemp 0.4398938 -0.4932097 0.3039288 -0.6861956 -0.0007270268
## windspeed 0.3135237 0.7460425 -0.3340572 -0.4831893 -0.0075670654
## solar 0.3297383 0.4032919 0.8064977 0.2787015 0.0226955712
## vwc 0.5493540 -0.1338197 -0.2540520 0.3365807 -0.7088536449
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 3.007451770 1.026595633 0.752944387 0.209745305 0.003262905
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp 0.61990805 0.1250119 -0.17192519 -0.1519721 0.739886645
## airtemp 0.56344969 -0.1540129 -0.04590494 -0.5718483 -0.574183574
## windspeed -0.07508528 0.7637636 0.55491342 -0.3210742 -0.003141396
## solar 0.43928918 0.4511804 -0.17401667 0.6731628 -0.346455370
## vwc -0.31563536 0.4168375 -0.79380335 -0.3060348 -0.053289629
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.2002470 1.1410616 0.8263220 0.6550125 0.1773570
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp 0.5196215 0.007862786 -0.2897629 0.3134311 0.7400879
## airtemp 0.4208191 0.424340028 0.6078799 0.4562867 -0.2552083
## windspeed -0.4310916 -0.048157951 -0.3509705 0.8106417 -0.1775404
## solar 0.3872501 -0.860259823 0.1092734 0.1408964 -0.2796389
## vwc 0.4659423 0.278401807 -0.6414036 -0.1287306 -0.5267069
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 3.15699912 0.67902325 0.55424707 0.51826593 0.09146463
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp 0.1203035 0.5417585 0.80164198 -0.2084616 -0.07706298
## airtemp 0.6208048 -0.1199698 -0.03951987 0.1732720 -0.75407141
## windspeed -0.5908322 0.1178874 0.15672165 0.6985250 -0.35287533
## solar 0.2383409 -0.6360487 0.52459159 0.3706177 0.35507998
## vwc 0.4407260 0.5231152 -0.23674948 0.5488405 0.41813254
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 1.9657749 1.3323030 0.8683081 0.4681599 0.3654541
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp -0.4860841 -0.28612372 -0.3291332 0.7216444 0.22968714
## airtemp -0.4436313 0.62609827 0.1104331 0.1913845 -0.60197068
## windspeed 0.4341868 -0.02422325 0.6715099 0.5991472 -0.03149752
## solar -0.5038944 0.19317116 0.5458666 -0.2069348 0.60661606
## vwc 0.3528254 0.69873622 -0.3613585 0.2019950 0.46465038
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.3837450 1.0756403 0.8440733 0.4712047 0.2253367
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp 0.53759638 -0.3087634 -0.2477784 -0.37623916 -0.64242137
## airtemp 0.08451663 0.7672672 -0.2696985 0.39068692 -0.42282912
## windspeed -0.47497597 0.3797274 0.1594208 -0.75110348 -0.20157763
## solar -0.46234334 -0.2141868 -0.8586058 -0.01283455 0.05471767
## vwc 0.51427963 0.3548202 -0.3213249 -0.37614713 0.60405526
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.2489345 1.3900058 0.6166531 0.4704407 0.2739660
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp -0.62991482 0.18869340 0.1156173 -0.1762302 -0.7233102
## airtemp -0.58030965 0.14924795 0.3534918 0.5449196 0.4680512
## windspeed -0.27606931 -0.63268572 0.3437524 -0.5762318 0.2707133
## solar 0.06698399 0.73484862 0.2425975 -0.5503432 0.3062344
## vwc -0.43098366 0.04273343 -0.8274400 -0.1925742 0.3011398
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.0385921 1.4650650 0.8515792 0.4262608 0.2185030
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp -0.4663849 -0.35008194 0.56348282 -0.06648713 -0.5813728
## airtemp -0.3694393 -0.68409863 -0.61950953 0.02780351 0.1046832
## windspeed -0.4961673 0.07871949 0.33606342 0.50152339 0.6189967
## solar 0.4727706 -0.30044036 0.04364336 0.78969969 -0.2463599
## vwc -0.4198876 0.55945556 -0.42877931 0.34591200 -0.4551890
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 3.4176779 0.9090763 0.2764280 0.2299386 0.1668792
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp -0.62472947 -0.05064441 0.03294048 -0.38060246 0.67912070
## airtemp -0.06767398 -0.61402208 -0.78381623 0.04491696 -0.04485201
## windspeed 0.53527022 -0.15655100 0.02806107 -0.82944528 0.01451492
## solar -0.43383785 -0.50814591 0.45729923 -0.17837611 -0.55913490
## vwc -0.36111471 0.58111234 -0.41789597 -0.36513932 -0.47322389
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.1797062 1.4459807 0.7219356 0.4933909 0.1589866
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp -0.51267387 -0.4415367 0.1410039 -0.631192464 -0.3520295
## airtemp -0.12870685 -0.5636375 0.5918534 0.534067043 0.1738656
## windspeed 0.01814444 0.5657720 0.6782930 0.002303951 -0.4684935
## solar -0.60936631 0.1321958 -0.3793249 0.546655283 -0.4104602
## vwc -0.59071443 0.3870200 0.1608061 -0.132405286 0.6766700
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 1.6968187 1.5118235 0.9666274 0.5022506 0.3224797
If you want to just make one plot (for testing):
# look at all_dates and pick a row to use (set to i)
i <- 8
site <- "TNHA"
month <- all_dates$months[i]
startdate <- all_dates$start[i]
enddate <- all_dates$end[i]
szn <<- all_dates$szn[i]
yr <<- substr(all_dates$start[i], 1, 4)
big_df <- pick_site(site)
pca_df <- pick_dates(startdate, enddate, big_df)
if(nrow(pca_df > 0)){
p <- calc_pca(pca_df)
pca <- p$pca
loads <- p$loads # not used at the moment
if(scree == T){
make_scree(pca)
}
if(eigen == T){
make_eigen(pca)
}
make_pca(pca_df, szn, yr, site)
}
## [1] "Eigenvectors (Loadings):"
## PC1 PC2 PC3 PC4 PC5
## groundtemp -0.4928330 0.32647984 0.2262052 -0.73802810 -0.23382095
## airtemp 0.1813779 0.92475594 0.1208391 0.30639388 0.05872997
## windspeed -0.4159177 0.16935761 -0.8878946 0.09246864 -0.03772554
## solar 0.5187891 0.09353516 -0.3080646 -0.57275961 0.54694762
## vwc 0.5311196 0.02839957 -0.2257621 -0.15758584 -0.80081436
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 3.0272627 1.0188230 0.5644705 0.2162281 0.1732157
#==================
## Code Graveyard
#==================
#The following code makes a bi-plot, which is similar to the code above, however it is not grouped at all. It is a bit harder to read, but it is very straightforward code for a simpler PCA.
# ### Bi-plots
# dev.new(height=7, width=7)
# biplot(scores[, 1:2], loadings[, 1:2], cex=0.7)
#NOTE: this is our code graveyard for code that can be used to find other PCA results, including k-means clustering. Above, we use clusters of TNHA-North and TNHA-South. Here, we use k-means clustering to group data points together and compare that to the different stations at TNHA. This can be useful, however we are particularly interested in identifying differences between north- and south-facing sensors.
# ### K Means Clustering
# pc_data <- summer_tnha_pca$x
#
# # Select the first two principal components
# pc_to_use <- pc_data[, 1:2]
# # Run k-means for different numbers of clusters
# wcss <- numeric()
# for (k in 1:10) {
# kmeans_result <- kmeans(pc_to_use, centers = k)
# wcss[k] <- kmeans_result$tot.withinss
# }
#
# # Plot the results
# plot(1:10, wcss, type = "b", xlab = "Number of Clusters", ylab = "Within-Cluster Sum of # Squares")
# # We can use 2-3 clusters (where the change in slope drops off)
# # Perform k-means clustering
# kmeans_result <- kmeans(pc_to_use, centers = 3) # Change 'centers' based on your specific # case
# # Add cluster assignments to the data
# clustered_data <- data.frame(pc_to_use, cluster = kmeans_result$cluster)
#
# # Plot the clusters
# ggplot(clustered_data, aes(x = PC1, y = PC2, color = factor(cluster))) +
# geom_point() +
# theme_minimal() +
# labs(title = "Clusters in Principal Component Space")
Find Each Data Point
# #Want to look at clustered data
#
# cluster_sensors = data.frame(clustered_data, summer_tnha$station)
#
# cluster1 = filter(cluster_sensors, cluster == 1)
# cluster2 = filter(cluster_sensors, cluster == 2)
# cluster3 = filter(cluster_sensors, cluster == 3)
#
# cluster_sensors$cluster = as.factor(cluster_sensors$cluster)
#
# ggplot(cluster_sensors, aes(x = summer_tnha.station, fill = cluster)) +
# geom_bar(position = "dodge") +
# theme_minimal() +
# labs(title = "Number of Instances in Each Group",
# x = "Category",
# y = "Count")
#
# p = ggplot(cluster_sensors, aes(x = PC1, y = PC2)) +
# geom_point(aes(color = summer_tnha.station, shape = cluster), size = 2) +
# scale_shape_manual(values = c(16, 17, 18)) + # Set shapes manually
# theme_minimal() +
# labs(title = "Biplot of Eigenvectors and PC Clusters",
# x = "PC1",
# y = "PC2")
# p + geom_segment(data = as.data.frame(eigenvectors), aes(x = 0, y = 0, xend = PC1*4, yend = # PC2*4),
# arrow = arrow(type = "closed", length = unit(0.1, "inches")),
# linewidth = 0.5, color = "black") +
# geom_text(data = as.data.frame(eigenvectors), aes(x = PC1*2.5, y = PC2*5+0.1, label = # rownames(eigenvectors)))
#
#