========

SETUP

========

FILEPATH:

filepath = "/Users/emvanmetre/Desktop/Arctic/X-Y_Plots/data/processed/Combined/"

PICK DATE RANGES:

years = c("2022", "2023")
seasons = c("Spring", "Summer","Fall")
sites = c("TNHA", "BEO")

# put season values for the season that has the start of the data
date_start = "2022-06-01" # data starts in June 2022 (YEAR-MO-DY) where day is always 01
# put season values for the season that has the last of the data
date_end = "2023-11-01" # data ends after November of 2023 (will get data up UNTIL date_end not after)

PICK OUTPUT:

scree = F # scree plot
eigen = T # eigenvectors and eigenvalues

Dates (for selecting date ranges; no need to edit)

spring_months = c("March", "April", "May")
summer_months = c("June","July","August")
fall_months = c("September", "October", "November")
winter_months = c("December", "January", "February")
spring_dates = data.frame(months=spring_months, start=c("-03-01","-04-01","-05-01"), end=c("-04-01","-05-01","-06-01"))

summer_dates = data.frame(months=summer_months, start=c("-06-01","-07-01","-08-01"), end=c("-07-01","-08-01","-09-01"))

fall_dates = data.frame(months=fall_months, start=c("-09-01","-10-01","-11-01"), end=c("-10-01","-11-01","-12-01"))

winter_dates = data.frame(months=winter_months, start=c("-12-01","-01-01","-02-01"), end=c("-01-01","-02-01","-03-01"))
all_dates = data.frame(matrix(nrow = 0, ncol = 4))
for(yur in years){
  # spring
  if("Spring" %in% seasons){
    for(i in c(1:3)){
      curdate = as.Date(paste0(yur, spring_dates[i, 2]))
      if(curdate >= date_start && curdate < date_end){
        all_dates <- rbind(all_dates, c(spring_dates[i,1], paste0(yur, spring_dates[i, 2]), paste0(yur, spring_dates[i, 3]),"Spring"))
      }
    }
  }
  if("Summer" %in% seasons){
    for(i in c(1:3)){
      curdate = as.Date(paste0(yur, summer_dates[i, 2]))
      if(curdate >= date_start && curdate < date_end){
        all_dates <- rbind(all_dates, c(summer_dates[i,1], paste0(yur, summer_dates[i, 2]), paste0(yur, summer_dates[i, 3]),"Summer"))
      }
    }
  }
  if("Fall" %in% seasons){
    for(i in c(1:3)){
      curdate = as.Date(paste0(yur, fall_dates[i, 2]))
      if(curdate >= date_start && curdate < date_end){
        all_dates <- rbind(all_dates, c(fall_dates[i,1], paste0(yur, fall_dates[i, 2]), paste0(yur, fall_dates[i, 3]),"Fall"))
      }
    }
  }
  if("Winter" %in% seasons){
    for(i in c(1:3)){
      curdate = as.Date(paste0(yur, winter_dates[i, 2]))
      if(curdate >= date_start && curdate < date_end){
        all_dates <- rbind(all_dates, c(winter_dates[i,1], paste0(yur, winter_dates[i, 2]), paste0(yur, winter_dates[i, 3]),"Winter"))
      }
    }
  }
  
}
colnames(all_dates) = c("months","start","end","szn")

Packages

library(dplyr)
library(lubridate)
library(tidyverse)

=========================

DEFINE FUNCTIONS

=========================

Temporal Range: Season Vertical Spatial Range: 30-45 cm Horizontal Spatial Range: stations across site (TNHA, SSMH, BEO) –> Average Total Site –> North vs South (except for BEO)

Filter by Site and Join Tables

pick_site <- function(cursite){
  gtfile = paste0("GroundTemperature_",szn,yr,"_DAILY.csv")
  airfile = paste0("AirTemperature_",szn,yr,"_DAILY.csv")
  vwcfile = paste0("VWC_",szn,yr,"_DAILY.csv")
  solfile = paste0("Solar_",szn,yr,"_DAILY.csv")
  windspeed = paste0("WindSpeed_",szn,yr,"_DAILY.csv")
  winddir = paste0("WindDirection_",szn,yr,"_DAILY.csv")
  
  grndtmp <<- read.csv(paste0(filepath, gtfile))
  airtmp <<- read.csv(paste0(filepath, airfile))
  vwc <<- read.csv(paste0(filepath, vwcfile))
  solar <<- read.csv(paste0(filepath, solfile))
  wind <<- read.csv(paste0(filepath, windspeed))
  
  pca_ground = grndtmp %>% filter(site == cursite) %>% filter(depth == "30cm")
  pca_air = airtmp %>% filter(site == cursite)
  pca_wind = wind %>% filter(site == cursite)
  pca_solar = solar %>% filter(site == cursite)
  pca_vwc = vwc %>% filter(site==cursite) %>% filter(depth == "30-45cm")
  
  big_df <<- full_join(pca_ground, pca_air, by=c("Date", "site", "station", "fullname")) %>% 
    select(Date, site, station, fullname, avg.x, avg.y) %>% rename("groundtemp"="avg.x", "airtemp"="avg.y")
  big_df <<- full_join(big_df, pca_wind, by=c("Date", "site", "station", "fullname")) %>% 
    select(Date, site, station, fullname, groundtemp, airtemp, avg) %>% rename("windspeed" = avg)
  big_df <<- full_join(big_df, pca_solar, by=c("Date", "site", "station", "fullname")) %>% 
    select(Date, site, station, fullname, groundtemp, airtemp, windspeed, avg)  %>% rename("solar" = avg)
  big_df <<- full_join(big_df, pca_vwc, by=c("Date", "site", "station", "fullname")) %>% 
    select(Date, site, station, fullname, groundtemp, airtemp, windspeed, solar, avg)  %>% rename("vwc" = avg)
  
  if(szn == "Winter"){
    big_df <<- big_df %>% select(-solar)
  }
  return(big_df)
}

Filter by Date Range

pick_dates <- function(datemin, datemax, big_df){
  pca_df <<- big_df %>% filter(Date >= datemin) %>% filter(Date < datemax)
  
  # get rid of NAs
  pca_df <<- na.omit(pca_df)
  pca_df <<- unique(pca_df)
  return(pca_df)
}

Calculate PCA

calc_pca <- function(pca_df){
  pca <<- prcomp(pca_df[,5:ncol(pca_df)], center=TRUE, scale.=TRUE)

  #take out variables
  sd <- pca$sdev
  loads <<- pca$rotation
  rownames(loads) <<- colnames(pca_df[5:ncol(pca_df)])
  scores <<- pca$x
  
  var <- sd^2
  varPercent <- var/sum(var) * 100
  
  return(list("pca"=pca, "loads"=loads))
}

Make Scree Plot

make_scree <- function(pca){
  sd <- pca$sdev
  
  var <- sd^2
  varPercent <- var/sum(var) * 100
  
  barplot(varPercent, xlab="PC", ylab="Percent Variance", names.arg=1:length(varPercent), 
          las=1, ylim=c(0, max(varPercent)), col="gray")
  abline(h=1/ncol(pca_df[5:ncol(pca_df)])*100, col="red")
}

Display Eigenvectors and Eigenvalues

make_eigen <- function(pca){
  eigenvectors <- pca$rotation
  print("Eigenvectors (Loadings):")
  print(eigenvectors)
  
  print("Loadings Cutoff:")
  sqrt(1/ncol(pca_df[5:ncol(pca_df)])) # cutoff for "important" loadings
  
  # Access the eigenvalues (variances of the principal components)
  eigenvalues <- (pca$sdev)^2
  print("Eigenvalues:")
  print(eigenvalues)
}

===============

PCA PLOTS

===============

make_pca <- function(pca_df, szn, yr, site){
  if(site == "TNHA"){
    SOUTH <<- pca_df$fullname == "TNHA-SA"
    NORTH <<- pca_df$fullname == "TNHA-SC"
    s <- "TNHA-SA"
    n <- "TNHA-SC"
  } else{
    if(site == "SSMH"){
      SOUTH <<- pca_df$fullname == "SSMH-SA"
      NORTH <<- pca_df$fullname == "SSMH-SB"
      s <- "SSMH-SA"
      n <- "SSMH-SB"
    } else {
      SOUTH <<- pca_df$fullname == "BEO-BASE"
      n <- "BEO"
    }
  }
  
    scaling <- 2
  textNudge <- 1.1
  limNudge <- 1.3
  
  xlimit <- seq(floor(min(scores[,1])*limNudge),ceiling(max(scores[,1])*limNudge), 1)
  ylimit <- seq(floor(min(scores[,2])*limNudge),ceiling(max(scores[,2])*limNudge), 1)
  
  plot(scores[, 1], scores[, 2], xlab="PCA 1", ylab="PCA 2", type="n", asp=1, 
       las=1, xaxt='n', yaxt='n')
  
  axis(side = 1, at=xlimit)
  axis(side = 2, at=ylimit)
  
  title(paste0(szn, " ", yr," Principal Component Analysis: ",site," North v. South\n",format(as.Date(min(pca_df$Date)), format="%B %d %Y")," - ",format(as.Date(max(pca_df$Date)), format="%B %d %Y")), adj=0.5)
  
 
   
  points(scores[SOUTH, 1], scores[SOUTH, 2], pch=16, cex=0.7, col="mediumturquoise")
   
  if(site != "BEO"){
    points(scores[NORTH, 1], scores[NORTH, 2], pch=16, cex=0.7, col="salmon")
     legend(x = "topright",          # Position
       legend = c(paste0(s, " (south)"), paste0(n, " (north)")),  # Legend texts
       col = c("mediumturquoise","salmon"),
       pch = 19)  #colors
  
  } else{
    legend(x = "topright",          # Position
       legend = "BEO",  # Legend texts
       col = "mediumturquoise",
       pch = 19) 
    
  }
  
   
  
  arrows(0, 0, loads[, 1]* scaling, loads[, 2]* scaling, length=0.1, angle=20, col="darkred")
   
  text(loads[1, 1]*scaling*textNudge, loads[1, 2]*scaling*textNudge, rownames(loads)[1],   col="darkred", cex=0.7) # ground label
  
  text(loads[2, 1]*scaling*textNudge, loads[2, 2]*scaling*textNudge+0.2, rownames(loads)[2],   col="darkred", cex=0.7) # air label
  
  if(nrow(loads) > 2){
    text(loads[3, 1]*scaling*textNudge, loads[3, 2]*scaling*textNudge, rownames(loads)[3],   col="darkred", cex=0.7) # wind label
  
    if(nrow(loads)>3){
      text(loads[4, 1]*scaling*textNudge-0.2, loads[4, 2]*scaling*textNudge, rownames(loads)[4],   col="darkred", cex=0.7) # solar label
      
      if(nrow(loads)>4){
  
  text(loads[5, 1]*scaling*textNudge, loads[5, 2]*scaling*textNudge, rownames(loads)[5],   col="darkred", cex=0.7) # vwc label
    }

    }
  
  
  }
  
  
 
  #text(-3, 1]*scaling*textNudge, 1, "TNHA-SA \n(south)", col="mediumturquoise")
  #text(1, 1, "TNHA-SC \n(north)", col="salmon")
}
for(i in c(1:nrow(all_dates))){
  
  month <- all_dates$months[i]
  startdate <- all_dates$start[i]
  enddate <- all_dates$end[i]
  szn <<- all_dates$szn[i]
  yr <<- substr(all_dates$start[i], 1, 4)
  
  for(site in sites){
    big_df <- pick_site(site)
    pca_df <- pick_dates(startdate, enddate, big_df)
    
    if(nrow(pca_df > 0)){
      p <- calc_pca(pca_df)
      pca <- p$pca
      loads <- p$loads
      if(scree == T){
        make_scree(pca)
      }
      if(eigen == T){
        make_eigen(pca)
      }
      make_pca(pca_df, szn, yr, site)
              
    }
  }
  
}
## [1] "Eigenvectors (Loadings):"
##                  PC1        PC2         PC3        PC4         PC5
## groundtemp 0.5476145  0.4045920 -0.03918486 -0.0580378 -0.72905408
## airtemp    0.3724533 -0.4286214  0.66305490 -0.4856979  0.04492273
## windspeed  0.3248552 -0.5207118 -0.73361839 -0.2912401  0.01765196
## solar      0.4449430 -0.3442438  0.11958841  0.8148957  0.07187135
## vwc        0.5078305  0.5127810 -0.07953252 -0.1088734  0.67895877
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.57644726 1.07455295 0.72798280 0.52963666 0.09138032

## [1] "Eigenvectors (Loadings):"
##                   PC1        PC2        PC3        PC4         PC5
## groundtemp -0.4860508 -0.4226322 0.07325217 -0.7465687  0.14968635
## airtemp    -0.5022990 -0.1458409 0.41191297  0.5508986  0.50325392
## windspeed   0.3542183  0.1812267 0.88077177 -0.2541337 -0.03665269
## solar      -0.5394409  0.1417582 0.19212997  0.1300206 -0.79692465
## vwc         0.3081936 -0.8643922 0.11085571  0.2401179 -0.29647458
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.6295396 0.9295922 0.7937236 0.3626124 0.2845322

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2         PC3        PC4        PC5
## groundtemp  0.3592753 -0.63344711 -0.25652697  0.3370505 -0.5387549
## airtemp     0.4467041  0.08230121  0.78786113  0.4078843  0.0811617
## windspeed  -0.1582638 -0.76345645  0.28783441 -0.3192084  0.4553506
## solar       0.5757197  0.08961316  0.08803741 -0.7698935 -0.2450098
## vwc        -0.5611382 -0.03278706  0.47209059 -0.1593657 -0.6601378
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.1504093 1.3267213 0.7880669 0.4249392 0.3098633

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2         PC3         PC4        PC5
## groundtemp -0.5252442 -0.03932148  0.21139271  0.47524974  0.6723267
## airtemp    -0.4367753 -0.48698738  0.57352415 -0.09309700 -0.4842248
## windspeed   0.3767501  0.48249405  0.78169155 -0.04663773  0.1097366
## solar      -0.4593501  0.28559019 -0.03647871 -0.80565266  0.2388072
## vwc         0.4247385 -0.66853186  0.11836723 -0.33796352  0.4944009
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.6493396 0.8693144 0.6830215 0.5466514 0.2516731

## [1] "Eigenvectors (Loadings):"
##                   PC1        PC2        PC3         PC4        PC5
## groundtemp  0.5372070 -0.2682398  0.2704804  0.00573965  0.7525048
## airtemp     0.4802015 -0.1533955 -0.2337970 -0.77241157 -0.3075641
## windspeed  -0.2732646 -0.7710632 -0.5532303  0.10359450  0.1182893
## solar       0.4083375  0.4344661 -0.7185460  0.33766224  0.1190609
## vwc        -0.4892873  0.3481637 -0.2231750 -0.52782555  0.5576496
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.7981110 1.0390136 0.5430929 0.4820706 0.1377119

## [1] "Eigenvectors (Loadings):"
##                   PC1        PC2         PC3        PC4        PC5
## groundtemp -0.5404616  0.2176303 -0.02372722  0.7658419 -0.2710379
## airtemp    -0.5649587  0.3237355 -0.01395118 -0.2358723  0.7212397
## windspeed   0.2916657  0.2458451 -0.89683997  0.1626742  0.1539691
## solar      -0.5284697 -0.1760414 -0.40162623 -0.5164223 -0.5115991
## vwc         0.1561213  0.8697137  0.18334893 -0.2543533 -0.3477236
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.3391180 1.1371182 0.8903281 0.4207271 0.2127086

## [1] "Eigenvectors (Loadings):"
##                   PC1          PC2         PC3        PC4         PC5
## groundtemp -0.5321448  0.002237055  0.03624474 -0.8411960 -0.08883964
## airtemp    -0.5478684  0.143017918 -0.14729444  0.2594716  0.76835211
## windspeed   0.1802992  0.778330875  0.57880984 -0.1006336  0.12862853
## solar      -0.4248978 -0.372082405  0.73321750  0.3206699 -0.20144342
## vwc        -0.4512385  0.485073813 -0.32305098  0.3348245 -0.58704156
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.8938153 1.1994165 0.5210513 0.2526733 0.1330435

## [1] "Eigenvectors (Loadings):"
##                  PC1        PC2         PC3        PC4        PC5
## groundtemp 0.4662444  0.3879798 -0.20869428 -0.7258858 -0.2482424
## airtemp    0.7429921 -0.0737595  0.04527871  0.2094164  0.6297753
## windspeed  0.1306295  0.5528159 -0.49148052  0.6083399 -0.2563200
## solar      0.4072216 -0.6594882 -0.13310129  0.1520747 -0.5986679
## vwc        0.2183699  0.3217159  0.83374306  0.1898166 -0.3430097
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 1.4410752 1.3323563 1.0319179 0.7900802 0.4045703

## [1] "Eigenvectors (Loadings):"
##                    PC1         PC2         PC3         PC4         PC5
## groundtemp -0.56672861  0.05840738 -0.25411763 -0.23517491  0.74533500
## airtemp    -0.48783723 -0.34589749 -0.05059914  0.79212154 -0.11114382
## windspeed   0.02783611 -0.83519168  0.43800241 -0.29974728  0.14136994
## solar      -0.36011428  0.41798772  0.83374092  0.01137321 -0.01872672
## vwc        -0.55711432 -0.06844428 -0.21422867 -0.47671631 -0.64170604
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.88262567 1.23324777 0.59077857 0.26221529 0.03113271

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2         PC3        PC4         PC5
## groundtemp -0.5645711 -0.24798478  0.13263297 -0.4221279  0.65113711
## airtemp    -0.1168635 -0.69311362 -0.67221898  0.2147269 -0.08916520
## windspeed   0.2027445 -0.63099638  0.70258390  0.2556534 -0.04189767
## solar      -0.5182810  0.23929893  0.08071101  0.8038413  0.14644369
## vwc        -0.5982323 -0.05173645  0.17433228 -0.2533395 -0.73815233
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.62660827 1.31058436 0.70599845 0.33159551 0.02521341

## [1] "Eigenvectors (Loadings):"
##                  PC1         PC2         PC3        PC4        PC5
## groundtemp 0.6673474 -0.03451116 -0.21848210  0.3963036  0.5904790
## airtemp    0.4661829 -0.50753216 -0.08636693  0.1661623 -0.7000110
## windspeed  0.1248567 -0.59631953  0.54721588 -0.4639113  0.3378682
## solar      0.4964658  0.42038665 -0.19363992 -0.7241383 -0.1221641
## vwc        0.2743236  0.45705379  0.77965678  0.2752156 -0.1795552
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 1.9234517 1.8872209 0.7107573 0.3113126 0.1672575

## [1] "Eigenvectors (Loadings):"
##                    PC1        PC2        PC3         PC4         PC5
## groundtemp  0.17658559  0.6965595 -0.3722293  0.44161857  0.38735104
## airtemp    -0.34312984  0.5197945 -0.1826984 -0.75500432 -0.09308803
## windspeed   0.05194032  0.4400026  0.8933571  0.06722204 -0.03307792
## solar      -0.63230526  0.1379988 -0.0888431  0.47642493 -0.58844931
## vwc        -0.66975496 -0.1788088  0.1486158  0.05866852  0.70279944
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 1.8998175 1.3616674 0.9122898 0.6605397 0.1656856

## [1] "Eigenvectors (Loadings):"
##                  PC1         PC2          PC3        PC4         PC5
## groundtemp 0.6193954  0.03807649  0.012384938  0.1155681  0.77549348
## airtemp    0.5071285  0.39487159  0.140932827  0.5546232 -0.50934090
## windspeed  0.1292896  0.72920686 -0.009407754 -0.6707723 -0.03895668
## solar      0.3890054 -0.45400647  0.644283279 -0.4138480 -0.23702710
## vwc        0.4371831 -0.32366973 -0.751528511 -0.2404818 -0.28545094
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.0659846 1.4011772 0.7138162 0.4885925 0.3304295

## [1] "Eigenvectors (Loadings):"
##                   PC1        PC2        PC3         PC4         PC5
## groundtemp -0.4928330 0.32647984  0.2262052 -0.73802810 -0.23382095
## airtemp     0.1813779 0.92475594  0.1208391  0.30639388  0.05872997
## windspeed  -0.4159177 0.16935761 -0.8878946  0.09246864 -0.03772554
## solar       0.5187891 0.09353516 -0.3080646 -0.57275961  0.54694762
## vwc         0.5311196 0.02839957 -0.2257621 -0.15758584 -0.80081436
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 3.0272627 1.0188230 0.5644705 0.2162281 0.1732157

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2         PC3        PC4         PC5
## groundtemp -0.5596809  0.24384971 -0.07162882 -0.4802551 -0.62571472
## airtemp    -0.4431436  0.34174682  0.48931076  0.6677492 -0.03897118
## windspeed  -0.3476091 -0.48844324 -0.62673199  0.4687732 -0.16748078
## solar      -0.1466919 -0.76489778  0.59703192 -0.1503673 -0.11981412
## vwc        -0.5899410 -0.01005067 -0.07876663 -0.2847935  0.75137015
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.65004402 1.03512242 0.92354650 0.30827202 0.08301504

## [1] "Eigenvectors (Loadings):"
##                  PC1        PC2        PC3        PC4           PC5
## groundtemp 0.5455998 -0.1400451 -0.2846963  0.3235778  0.7049493826
## airtemp    0.4398938 -0.4932097  0.3039288 -0.6861956 -0.0007270268
## windspeed  0.3135237  0.7460425 -0.3340572 -0.4831893 -0.0075670654
## solar      0.3297383  0.4032919  0.8064977  0.2787015  0.0226955712
## vwc        0.5493540 -0.1338197 -0.2540520  0.3365807 -0.7088536449
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 3.007451770 1.026595633 0.752944387 0.209745305 0.003262905

## [1] "Eigenvectors (Loadings):"
##                    PC1        PC2         PC3        PC4          PC5
## groundtemp  0.61990805  0.1250119 -0.17192519 -0.1519721  0.739886645
## airtemp     0.56344969 -0.1540129 -0.04590494 -0.5718483 -0.574183574
## windspeed  -0.07508528  0.7637636  0.55491342 -0.3210742 -0.003141396
## solar       0.43928918  0.4511804 -0.17401667  0.6731628 -0.346455370
## vwc        -0.31563536  0.4168375 -0.79380335 -0.3060348 -0.053289629
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.2002470 1.1410616 0.8263220 0.6550125 0.1773570

## [1] "Eigenvectors (Loadings):"
##                   PC1          PC2        PC3        PC4        PC5
## groundtemp  0.5196215  0.007862786 -0.2897629  0.3134311  0.7400879
## airtemp     0.4208191  0.424340028  0.6078799  0.4562867 -0.2552083
## windspeed  -0.4310916 -0.048157951 -0.3509705  0.8106417 -0.1775404
## solar       0.3872501 -0.860259823  0.1092734  0.1408964 -0.2796389
## vwc         0.4659423  0.278401807 -0.6414036 -0.1287306 -0.5267069
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 3.15699912 0.67902325 0.55424707 0.51826593 0.09146463

## [1] "Eigenvectors (Loadings):"
##                   PC1        PC2         PC3        PC4         PC5
## groundtemp  0.1203035  0.5417585  0.80164198 -0.2084616 -0.07706298
## airtemp     0.6208048 -0.1199698 -0.03951987  0.1732720 -0.75407141
## windspeed  -0.5908322  0.1178874  0.15672165  0.6985250 -0.35287533
## solar       0.2383409 -0.6360487  0.52459159  0.3706177  0.35507998
## vwc         0.4407260  0.5231152 -0.23674948  0.5488405  0.41813254
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 1.9657749 1.3323030 0.8683081 0.4681599 0.3654541

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2        PC3        PC4         PC5
## groundtemp -0.4860841 -0.28612372 -0.3291332  0.7216444  0.22968714
## airtemp    -0.4436313  0.62609827  0.1104331  0.1913845 -0.60197068
## windspeed   0.4341868 -0.02422325  0.6715099  0.5991472 -0.03149752
## solar      -0.5038944  0.19317116  0.5458666 -0.2069348  0.60661606
## vwc         0.3528254  0.69873622 -0.3613585  0.2019950  0.46465038
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.3837450 1.0756403 0.8440733 0.4712047 0.2253367

## [1] "Eigenvectors (Loadings):"
##                    PC1        PC2        PC3         PC4         PC5
## groundtemp  0.53759638 -0.3087634 -0.2477784 -0.37623916 -0.64242137
## airtemp     0.08451663  0.7672672 -0.2696985  0.39068692 -0.42282912
## windspeed  -0.47497597  0.3797274  0.1594208 -0.75110348 -0.20157763
## solar      -0.46234334 -0.2141868 -0.8586058 -0.01283455  0.05471767
## vwc         0.51427963  0.3548202 -0.3213249 -0.37614713  0.60405526
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.2489345 1.3900058 0.6166531 0.4704407 0.2739660

## [1] "Eigenvectors (Loadings):"
##                    PC1         PC2        PC3        PC4        PC5
## groundtemp -0.62991482  0.18869340  0.1156173 -0.1762302 -0.7233102
## airtemp    -0.58030965  0.14924795  0.3534918  0.5449196  0.4680512
## windspeed  -0.27606931 -0.63268572  0.3437524 -0.5762318  0.2707133
## solar       0.06698399  0.73484862  0.2425975 -0.5503432  0.3062344
## vwc        -0.43098366  0.04273343 -0.8274400 -0.1925742  0.3011398
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.0385921 1.4650650 0.8515792 0.4262608 0.2185030

## [1] "Eigenvectors (Loadings):"
##                   PC1         PC2         PC3         PC4        PC5
## groundtemp -0.4663849 -0.35008194  0.56348282 -0.06648713 -0.5813728
## airtemp    -0.3694393 -0.68409863 -0.61950953  0.02780351  0.1046832
## windspeed  -0.4961673  0.07871949  0.33606342  0.50152339  0.6189967
## solar       0.4727706 -0.30044036  0.04364336  0.78969969 -0.2463599
## vwc        -0.4198876  0.55945556 -0.42877931  0.34591200 -0.4551890
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 3.4176779 0.9090763 0.2764280 0.2299386 0.1668792

## [1] "Eigenvectors (Loadings):"
##                    PC1         PC2         PC3         PC4         PC5
## groundtemp -0.62472947 -0.05064441  0.03294048 -0.38060246  0.67912070
## airtemp    -0.06767398 -0.61402208 -0.78381623  0.04491696 -0.04485201
## windspeed   0.53527022 -0.15655100  0.02806107 -0.82944528  0.01451492
## solar      -0.43383785 -0.50814591  0.45729923 -0.17837611 -0.55913490
## vwc        -0.36111471  0.58111234 -0.41789597 -0.36513932 -0.47322389
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 2.1797062 1.4459807 0.7219356 0.4933909 0.1589866

## [1] "Eigenvectors (Loadings):"
##                    PC1        PC2        PC3          PC4        PC5
## groundtemp -0.51267387 -0.4415367  0.1410039 -0.631192464 -0.3520295
## airtemp    -0.12870685 -0.5636375  0.5918534  0.534067043  0.1738656
## windspeed   0.01814444  0.5657720  0.6782930  0.002303951 -0.4684935
## solar      -0.60936631  0.1321958 -0.3793249  0.546655283 -0.4104602
## vwc        -0.59071443  0.3870200  0.1608061 -0.132405286  0.6766700
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 1.6968187 1.5118235 0.9666274 0.5022506 0.3224797

If you want to just make one plot (for testing):

  # look at all_dates and pick a row to use (set to i)

i <- 8
site <- "TNHA"

month <- all_dates$months[i]
startdate <- all_dates$start[i]
enddate <- all_dates$end[i]
szn <<- all_dates$szn[i]
yr <<- substr(all_dates$start[i], 1, 4)

big_df <- pick_site(site)
pca_df <- pick_dates(startdate, enddate, big_df)


if(nrow(pca_df > 0)){
    p <- calc_pca(pca_df)
    pca <- p$pca
    loads <- p$loads # not used at the moment
    if(scree == T){
      make_scree(pca)
    }
    if(eigen == T){
      make_eigen(pca)
    }
    make_pca(pca_df, szn, yr, site)
            
  }
## [1] "Eigenvectors (Loadings):"
##                   PC1        PC2        PC3         PC4         PC5
## groundtemp -0.4928330 0.32647984  0.2262052 -0.73802810 -0.23382095
## airtemp     0.1813779 0.92475594  0.1208391  0.30639388  0.05872997
## windspeed  -0.4159177 0.16935761 -0.8878946  0.09246864 -0.03772554
## solar       0.5187891 0.09353516 -0.3080646 -0.57275961  0.54694762
## vwc         0.5311196 0.02839957 -0.2257621 -0.15758584 -0.80081436
## [1] "Loadings Cutoff:"
## [1] "Eigenvalues:"
## [1] 3.0272627 1.0188230 0.5644705 0.2162281 0.1732157
#==================
## Code Graveyard
#==================
#The following code makes a bi-plot, which is similar to the code above, however it is not grouped at all. It is a bit harder to read, but it is very straightforward code for a simpler PCA.
#   ### Bi-plots
#   dev.new(height=7, width=7)
#   biplot(scores[, 1:2], loadings[, 1:2], cex=0.7)
#NOTE: this is our code graveyard for code that can be used to find other PCA results, including k-means clustering. Above, we use clusters of TNHA-North and TNHA-South. Here, we use k-means clustering to group data points together and compare that to the different stations at TNHA. This can be useful, however we are particularly interested in identifying differences between north- and south-facing sensors.
#   ### K Means Clustering
#   pc_data <- summer_tnha_pca$x
#   
#   # Select the first two principal components
#   pc_to_use <- pc_data[, 1:2]
#   # Run k-means for different numbers of clusters
#   wcss <- numeric()
#   for (k in 1:10) {
#     kmeans_result <- kmeans(pc_to_use, centers = k)
#     wcss[k] <- kmeans_result$tot.withinss
#   }
#   
#   # Plot the results
#   plot(1:10, wcss, type = "b", xlab = "Number of Clusters", ylab = "Within-Cluster Sum of #   Squares")
#   # We can use 2-3 clusters (where the change in slope drops off)
#   # Perform k-means clustering
#   kmeans_result <- kmeans(pc_to_use, centers = 3)  # Change 'centers' based on your specific #   case
#   # Add cluster assignments to the data
#   clustered_data <- data.frame(pc_to_use, cluster = kmeans_result$cluster)
#   
#   # Plot the clusters
#   ggplot(clustered_data, aes(x = PC1, y = PC2, color = factor(cluster))) +
#     geom_point() +
#     theme_minimal() +
#     labs(title = "Clusters in Principal Component Space")

Find Each Data Point

#   #Want to look at clustered data
#   
#   cluster_sensors = data.frame(clustered_data, summer_tnha$station)
#   
#   cluster1 = filter(cluster_sensors, cluster == 1)
#   cluster2 = filter(cluster_sensors, cluster == 2)
#   cluster3 = filter(cluster_sensors, cluster == 3)
#   
#   cluster_sensors$cluster = as.factor(cluster_sensors$cluster)
#   
#   ggplot(cluster_sensors, aes(x = summer_tnha.station, fill = cluster)) +
#     geom_bar(position = "dodge") +
#     theme_minimal() +
#     labs(title = "Number of Instances in Each Group",
#          x = "Category",
#          y = "Count")
#   
#   p = ggplot(cluster_sensors, aes(x = PC1, y = PC2)) +
#     geom_point(aes(color = summer_tnha.station, shape = cluster), size = 2) +
#     scale_shape_manual(values = c(16, 17, 18)) +  # Set shapes manually
#     theme_minimal() +
#     labs(title = "Biplot of Eigenvectors and PC Clusters",
#          x = "PC1",
#          y = "PC2") 
#   p + geom_segment(data = as.data.frame(eigenvectors), aes(x = 0, y = 0, xend = PC1*4, yend = #   PC2*4),
#                    arrow = arrow(type = "closed", length = unit(0.1, "inches")),
#                    linewidth = 0.5, color = "black") +
#     geom_text(data = as.data.frame(eigenvectors), aes(x = PC1*2.5, y = PC2*5+0.1, label = #   rownames(eigenvectors)))
#   
#