#setwd("C:/Users/Fiona/Downloads/Ali/")
library(here)
library(dplyr)
library(reshape2)
library(ggplot2)
library(DescTools)
library(knitr)
library(dplyr)
library(lubridate)
library(pracma)
#####adding in appropriate date time vlaues for each of the acoustic datasets

file_name<-list.files(paste0(here::here(),"/Data/Raw_data"), pattern = "*.csv")

f_sites<-gsub("_|.csv", "", file_name)

f_df<-data.frame(file_name, f_sites)

all_sites<-read.csv("surveyTimeAndDate_FSamend.csv")

sites_f<-merge(f_df, all_sites, by.x = "f_sites", by.y = "SiteCode")

sites_f$start_datetime<-as.POSIXct(paste(sites_f$Start_Date, sites_f$Start_Time, sep = " "), format = "%d-%m-%y %H:%M:%OS")
sites_f$end_datetime<-as.POSIXct(paste(sites_f$End_Date, sites_f$End_Time, sep = " "), format = "%d-%m-%y %H:%M:%OS")
sr<-0.524  #one record every 0.524 seconds


site_datetime<-function(site_name){
  
  df<-sites_f[sites_f$f_sites == site_name,]
  
  acoustic<-read.csv(paste(getwd(), "/Data/Raw_data/", df$file_name, sep=""))
  
  time_seq<-seq(
    from=as.POSIXct(df$start_datetime[1], format = "%Y-%m-%d %H:%M:%OS"),
    to=as.POSIXct(df$end_datetime[1], format = "%Y-%m-%d %H:%M:%OS"),
    by= sr
    #length.out = (7*24*60*60)/sr
  )  
  
  t.lub <- ymd_hms(time_seq)
  h.lub <- minute(t.lub)
  
  down_time<-which(h.lub == 29 |h.lub == 59 )    #removing times that are in the 29th or 59th minute
  
  time_seq_sub<-time_seq[-down_time]
  time_seq_sub<-time_seq_sub[1:nrow(acoustic)]
  df_sub <- acoustic
  #df_sub<-acoustic[1:length(time_seq_sub),]
  df_sub$datetime<-time_seq_sub
  df_sub$site<-site_name
  
  df_sub<-df_sub[complete.cases(df_sub),]
  
  df_sub$time_30min<-as.factor(lubridate::round_date(as.POSIXct(df_sub$datetime, format = "%Y-%m-%d %H:%M:%S"), "30 minutes")) 
                     
  df_sub_avg <- df_sub %>%
    group_by(time_30min)%>%
    mutate(anthrop_30 = mean(anthrop), biotic_30 = mean(biotic))%>%
    ungroup()

   #all_data_avg$time_30min <- as.POSIXct(all_data_avg$time_30min, format="%H:%M:%OS")
    df_sub_avg$time_day <- strftime(df_sub_avg$time_30min, format="%H:%M:%OS")
    df_sub_avg$time_day <- as.POSIXct(df_sub_avg$time_day, format="%H:%M:%OS")

    df_sub_avg$day<-format(as.Date(df_sub_avg$time_30min ,format="%Y-%m-%d"), "%d")
    df_sub_avg$minutes_after_midnight<-as.numeric(difftime(df_sub_avg$time_day, as.POSIXct('00:00:00', format = '%H:%M:%S'), units = 'min'))
    

  
  write.csv(df_sub_avg, paste(getwd(), "/Data/Processed_data_c/",site_name, "_acoustic_datetime_check.csv", sep=""), row.names = FALSE)
  
 
  df_sub_avg_30<-unique(df_sub_avg[,4:ncol(df_sub_avg)])
  
  write.csv(df_sub_avg_30, paste(getwd(), "/Data/Processed_data_c/",site_name, "_acoustic_datetime_30min_avg.csv", sep=""), row.names = FALSE)
  
  return(df_sub)
  
}

out<-lapply(sites_f$f_sites, site_datetime)
#files<-list.files(here::here("/Data/Processed_data_c/"))

suntimes<-read.csv(here::here("Data/suntimes_summary_allsites.csv"), stringsAsFactors = FALSE)

sr_min <- as.POSIXct(strftime(suntimes$min_sunrise, format="%H:%M:%S"), format="%H:%M:%S", tz = "GMT")
sr_max <- as.POSIXct(strftime(suntimes$max_sunrise, format="%H:%M:%S"), format="%H:%M:%S", tz = "GMT")

ss_min<- as.POSIXct(strftime(suntimes$min_sunset, format="%H:%M:%S"), format="%H:%M:%S", tz = "GMT")
ss_max<-as.POSIXct(strftime(suntimes$max_sunset, format="%H:%M:%S"), format="%H:%M:%S", tz = "GMT")


for (i in 1:length(sr_min)){
  suntimes$mean_sunrise[i]<-mean(c(sr_min[i], sr_max[i]))
  suntimes$mean_sunset[i]<-mean(c(ss_min[i], ss_max[i]))
  suntimes$mean_sunrise<-as_datetime(suntimes$mean_sunrise)
  suntimes$mean_sunset<-as_datetime(suntimes$mean_sunset)
  }


suntimes$mean_sunrise <- strftime(suntimes$mean_sunrise, format="%H:%M:%OS")
suntimes$mean_sunrise <- as.POSIXct(suntimes$mean_sunrise, format="%H:%M:%OS")


suntimes$mean_sunset <- strftime(suntimes$mean_sunset, format="%H:%M:%OS")
suntimes$mean_sunset <- as.POSIXct(suntimes$mean_sunset, format="%H:%M:%OS")


suntimes$sr_mins_after_midnight<-as.numeric(difftime(suntimes$mean_sunrise, as.POSIXct('00:00:00', format = '%H:%M:%S'), units = 'min'))
suntimes$sr_mins_after_midnight<-suntimes$sr_mins_after_midnight

suntimes$ss_mins_after_midnight<-as.numeric(difftime(suntimes$mean_sunset, as.POSIXct('00:00:00', format = '%H:%M:%S'), units = 'min'))
suntimes$ss_mins_after_midnight<-suntimes$ss_mins_after_midnight
#files_avg<-list.files(here::here("/Data/Processed_data_c/"), pattern = ".30min_avg.csv")

files_avg<-list.files("C:/Users/Fiona/Downloads/Ali/Data/Processed_data_c/", pattern = ".30min_avg.csv")

feature_extract<-function(file){

    #all_data<-read.csv(paste(here::here("/Data/Processed_data_c/", file), sep=""))
    
    all_data<-read.csv(paste0("C:/Users/Fiona/Downloads/Ali/Data/Processed_data_c/",file))
  
    suntimes_sub<-suntimes[suntimes$site_id %in% all_data$site,]
    
    test_data<-all_data[all_data$site == all_data$site[1] & all_data$day == all_data$day[1],]
    
    data_summary<-all_data %>%
    group_by(site,day) %>%
      mutate(which_diff_anthro = which.max(diff(anthrop_30)), 
      steepest_anth = max(diff(anthrop_30)), 
      which_diff_bio = which.max(diff(biotic_30)), 
      steepest_bio = max(diff(biotic_30)),
      steepest_time_anth = time_30min[unique(which_diff_anthro)],
      steepest_time_bio = time_30min[unique(which_diff_bio)],
      auc_anthro = AUC(minutes_after_midnight, anthrop_30, method = "spline"),
      auc_bio = AUC(minutes_after_midnight, biotic_30, method = "spline"), 
      n_peaks_anth = ifelse(is.null(nrow(findpeaks(anthrop_30, threshold = 0.5))), NA,  nrow(findpeaks(anthrop_30, threshold = 0.5))),
      n_peaks_bio = ifelse(is.null(nrow(findpeaks(biotic_30, threshold = 0.5))), NA,  nrow(findpeaks(biotic_30, threshold = 0.5))), 
      auc_an_min_bio = auc_anthro - auc_bio, 
      peak_anth = max(anthrop_30), 
      peak_bio = max(biotic_30),
      peak_anth_time = time_30min[which(anthrop_30 == peak_anth)],
      peak_bio_time = time_30min[which(biotic_30 == peak_bio)]) %>%
    select(site, day, steepest_anth, steepest_bio,steepest_time_anth,
           steepest_time_bio ,auc_anthro, auc_bio, n_peaks_anth, n_peaks_bio, 
           auc_an_min_bio, peak_anth, peak_bio, peak_anth_time, peak_bio_time) %>%
    distinct()
    
    #print(file)
    return(data_summary)
}


fe_out<-lapply(files_avg[1:(length(files_avg)-1)], feature_extract)


df<-bind_rows(fe_out, .id = "column_label")

#write.csv(df, "example_feature_extraction.csv", row.names = FALSE)

#saveRDS(df, "example_feature_extraction.RDS")

Dawn/Dusk specific feature extraction

Need to make slope an absolute value - currently only pilling out steepest positive slopes i.e. slopes of rapidly increasing volume

library(forcats)

dawn_feature_extract<-function(file){

#    all_data<-read.csv(paste(here::here("/Data/Processed_data_c/", file), sep=""))
   all_data<-read.csv(paste0("C:/Users/Fiona/Downloads/Ali/Data/Processed_data_c/",file))

    
    suntimes_sub<-suntimes[suntimes$site_id %in% all_data$site,]

    sr_data<-all_data[all_data$minutes_after_midnight >= (suntimes_sub$sr_mins_after_midnight - 60) & all_data$minutes_after_midnight <= (suntimes_sub$sr_mins_after_midnight + 60),]
    
    dawn_summary<-sr_data %>%
    group_by(site,day) %>%
      mutate(steepest_dawn_anth = max(diff(anthrop_30)), 
      steepest_dawn_bio = max(diff(biotic_30)),
      auc_dawn_anthro = AUC(minutes_after_midnight, anthrop_30, method = "spline"),
      auc_dawn_bio = AUC(minutes_after_midnight, biotic_30, method = "spline"),
      dawn_auc_an_min_bio = auc_dawn_anthro - auc_dawn_bio,
      peak_dawn_anth = max(anthrop_30),
      peak_dawn_bio = max(biotic_30))  %>%
    select(site, day, steepest_dawn_anth, steepest_dawn_bio, auc_dawn_anthro, 
           auc_dawn_bio, dawn_auc_an_min_bio, peak_dawn_anth, peak_dawn_bio)%>%
    distinct()
    
    #print(file)
    return(dawn_summary)
}

dawn_out<-lapply(files_avg[1:(length(files_avg)-1)], dawn_feature_extract)


dawn_df<-bind_rows(dawn_out, .id = "column_label")


write.csv(df, "dawn_example_feature_extraction.csv", row.names = FALSE)
dusk_feature_extract<-function(file){

   # all_data<-read.csv(paste(here::here("/Data/Processed_data_c/", file), sep=""))
    all_data<-read.csv(paste0("C:/Users/Fiona/Downloads/Ali/Data/Processed_data_c/",file))
    
    suntimes_sub<-suntimes[suntimes$site_id %in% all_data$site,]
    
    ss_data<-all_data[all_data$minutes_after_midnight >= (suntimes_sub$ss_mins_after_midnight - 60) & all_data$minutes_after_midnight <= (suntimes_sub$ss_mins_after_midnight + 60),]
    
    dusk_summary<-ss_data %>%
      select(site, time_30min, anthrop_30, biotic_30, time_day, day, minutes_after_midnight)%>%
      distinct()%>%
      group_by(site,day) %>%
      mutate(steepest_dusk_anth = max(diff(anthrop_30)),
      steepest_dusk_bio = max(diff(biotic_30)),
      auc_dusk_anthro = AUC(minutes_after_midnight, anthrop_30, method = "spline"),
      auc_dusk_bio = AUC(minutes_after_midnight, biotic_30, method = "spline"),
      dusk_auc_an_min_bio = auc_dusk_anthro - auc_dusk_bio,
      peak_dusk_anth = max(anthrop_30),
      peak_dusk_bio = max(biotic_30) )  %>%
    select(site, day, steepest_dusk_anth, steepest_dusk_bio, auc_dusk_anthro,
           auc_dusk_bio, dusk_auc_an_min_bio, peak_dusk_anth, peak_dusk_bio)%>%
    distinct()
    
    #print(file)
    return(dusk_summary)
}

dusk_out<-lapply(files_avg[1:(length(files_avg)-1)], dusk_feature_extract)


dusk_df<-bind_rows(dusk_out, .id = "column_label")

write.csv(df, "dusk_example_feature_extraction.csv", row.names = FALSE)
#saveRDS(df, "dusk_example_feature_extraction.RDS")
df_dawn<-merge(df, dawn_df, by=c("site", "day"))

df_all<-merge(df_dawn, dusk_df, by=c("site", "day"))

df_all<-df_all[, -grep("column_label", colnames(df_all))]

#write.csv(df_all, "all_days_feature_extractions.csv", row.names = FALSE)
saveRDS(df_all, "all_days_feature_extractions.RDS")

formatting output for pca

xy<-read.csv("D:/Fiona/Green_Infrastructure/allSitesDetailsCoordinates.csv")
hab<-read.csv("D:/Fiona/Green_Infrastructure/allSitesHabitat.csv")
area<-read.csv("D:/Fiona/Green_Infrastructure/siteAreaAndPerimeter.csv")
temp<-read.csv("D:/Fiona/Green_Infrastructure/temperature.csv")


df<-readRDS("all_days_feature_extractions.RDS")
#head(df)

df_xy<-merge(df, xy, by.x = "site", by.y = "SiteCode")
df_hab<-merge(df_xy,hab, by.x = "site", by.y="Site" )
df_area<-merge(df_hab, area[,c("SiteCode", "Area", "Perimeter")], by.x = "site", by.y = "SiteCode")
df_area$edge<-df_area$Area/df_area$Perimeter
df_all<-merge(df_area, temp, by.x = "site", by.y = "Site")
df_all$id<-paste(df_all$site, df_all$day, sep = "_")
df_mat<-data.frame(df[,3:ncol(df)])
row.names(df_mat)<-paste(df$site, df$day, sep = "_")

df_mat$steepest_time_anth <- strftime(df_mat$steepest_time_anth, format="%H:%M:%OS")
df_mat$steepest_time_anth <- as.POSIXct(df_mat$steepest_time_anth, format="%H:%M:%OS")
df_mat$steepest_time_anth<-as.numeric(difftime(df_mat$steepest_time_anth, as.POSIXct('00:00:00', format = '%H:%M:%S'), units = 'min'))
    
df_mat$steepest_time_bio <- strftime(df_mat$steepest_time_bio, format="%H:%M:%OS")
df_mat$steepest_time_bio <- as.POSIXct(df_mat$steepest_time_bio, format="%H:%M:%OS")
df_mat$steepest_time_bio<-as.numeric(difftime(df_mat$steepest_time_bio, as.POSIXct('00:00:00', format = '%H:%M:%S'), units = 'min'))

df_mat$peak_anth_time <- strftime(df_mat$peak_anth_time, format="%H:%M:%OS")
df_mat$peak_anth_time <- as.POSIXct(df_mat$peak_anth_time, format="%H:%M:%OS")
df_mat$peak_anth_time<-as.numeric(difftime(df_mat$peak_anth_time, as.POSIXct('00:00:00', format = '%H:%M:%S'), units = 'min'))

df_mat$peak_bio_time<- strftime(df_mat$peak_bio_time, format="%H:%M:%OS")
df_mat$peak_bio_time<- as.POSIXct(df_mat$peak_bio_time, format="%H:%M:%OS")
df_mat$peak_bio_time<-as.numeric(difftime(df_mat$peak_bio_time, as.POSIXct('00:00:00', format = '%H:%M:%S'), units = 'min'))

df_mat$n_peaks_anth[is.na(df_mat$n_peaks_anth)]<-0
df_mat$n_peaks_bio[is.na(df_mat$n_peaks_bio)]<-0

df_mat$steepest_dusk_anth[is.infinite(df_mat$steepest_dusk_anth)]<-0
df_mat$steepest_dusk_bio[is.infinite(df_mat$steepest_dusk_bio)]<-0
pca_out <- prcomp(df_mat, scale = TRUE)
biplot(pca_out, scale = 0, cex= 0.6)

pca_out
## Standard deviations (1, .., p=27):
##  [1] 3.269943e+00 1.936018e+00 1.378591e+00 1.234103e+00 1.109111e+00
##  [6] 9.887042e-01 9.682621e-01 9.187245e-01 8.991384e-01 8.274537e-01
## [11] 8.007679e-01 7.342839e-01 7.108128e-01 6.736970e-01 6.179815e-01
## [16] 5.637408e-01 4.211666e-01 4.019312e-01 3.197424e-01 3.078731e-01
## [21] 2.836405e-01 2.708327e-01 2.522906e-01 2.465105e-01 5.069991e-16
## [26] 4.209748e-16 3.695482e-16
## 
## Rotation (n x k) = (27 x 27):
##                              PC1         PC2          PC3          PC4
## steepest_anth       -0.094626046  0.01525340 -0.500781463  0.199737026
## steepest_bio        -0.233782636  0.19038270  0.027862956  0.012078005
## steepest_time_anth  -0.006036160  0.04834983 -0.082920798 -0.294146639
## steepest_time_bio    0.023353590  0.04203523 -0.241099099 -0.529024483
## auc_anthro           0.249479037  0.27272771  0.047496232  0.026104548
## auc_bio             -0.239737264  0.25230762  0.104537700  0.022733801
## n_peaks_anth        -0.055140633 -0.02610232 -0.497000286  0.172941088
## n_peaks_bio         -0.198609131  0.20022965  0.062628379  0.031717018
## auc_an_min_bio       0.295523388  0.07584487 -0.015915163  0.007940248
## peak_anth            0.208359559  0.27488847 -0.193552022  0.114598354
## peak_bio            -0.246629885  0.21122123  0.022607431  0.010013908
## peak_anth_time       0.002847515  0.03895935  0.008757495 -0.095631847
## peak_bio_time        0.017011483  0.05583535 -0.190216250 -0.536222902
## steepest_dawn_anth  -0.061878788  0.01336648 -0.304795244  0.304079377
## steepest_dawn_bio   -0.140997066  0.14033010  0.031152407  0.164763327
## auc_dawn_anthro      0.224250613  0.28837484  0.089788929 -0.047581492
## auc_dawn_bio        -0.213776302  0.21099927  0.149005913  0.083572470
## dawn_auc_an_min_bio  0.279772230  0.09151848 -0.017497079 -0.080458623
## peak_dawn_anth       0.206155959  0.31470088 -0.066560943  0.072650745
## peak_dawn_bio       -0.230553978  0.21779780  0.132696419  0.101285918
## steepest_dusk_anth  -0.081631375  0.01977615 -0.377108359  0.069727718
## steepest_dusk_bio   -0.126682262  0.17774690 -0.211628763 -0.201078804
## auc_dusk_anthro      0.231228603  0.27788087  0.052187724  0.059718318
## auc_dusk_bio        -0.190481006  0.28308839  0.035138086 -0.095598345
## dusk_auc_an_min_bio  0.282087852  0.04820706  0.019292508  0.099208817
## peak_dusk_anth       0.224276613  0.29150137 -0.069627184  0.096363615
## peak_dusk_bio       -0.213956384  0.26645718 -0.046685400 -0.153239160
##                               PC5          PC6          PC7          PC8
## steepest_anth        0.0299681097 -0.112007268  0.141907035 -0.207612409
## steepest_bio        -0.0485485081 -0.048143067  0.066967746  0.070248990
## steepest_time_anth   0.5252978764  0.414896103 -0.368853999 -0.204567315
## steepest_time_bio   -0.2271096171  0.215219267  0.064088692  0.122560507
## auc_anthro           0.0005078567  0.016041702  0.001392713  0.017389853
## auc_bio              0.0404645790  0.011971092  0.067438583 -0.033582250
## n_peaks_anth        -0.0067398981  0.075009911  0.225326941 -0.454069162
## n_peaks_bio         -0.0562729342 -0.131517453  0.016811624  0.096664862
## auc_an_min_bio      -0.0190805707  0.005840266 -0.031404995  0.028707914
## peak_anth            0.0157430988 -0.005959360  0.079309016 -0.179070541
## peak_bio            -0.0050339026 -0.048831975  0.099145783  0.062690590
## peak_anth_time       0.5706938295  0.181477126  0.664462878  0.337566827
## peak_bio_time       -0.3493782205  0.148852363  0.136138120 -0.065161949
## steepest_dawn_anth  -0.2228097554  0.240429781  0.009853703  0.554311990
## steepest_dawn_bio   -0.2284059286  0.528897542 -0.060419474  0.102821258
## auc_dawn_anthro      0.0231204138 -0.027078946 -0.081508204 -0.023108885
## auc_dawn_bio         0.0317939246  0.205120412 -0.088203580 -0.199085944
## dawn_auc_an_min_bio -0.0008757406 -0.132773309 -0.010097282  0.093241283
## peak_dawn_anth      -0.0320000176  0.041741220 -0.114112392  0.029448860
## peak_dawn_bio       -0.0626514695  0.251909729 -0.080563681 -0.097937668
## steepest_dusk_anth   0.3103063652 -0.051513721 -0.450339530  0.271786511
## steepest_dusk_bio    0.0238056185 -0.352284790 -0.216350137  0.245466373
## auc_dusk_anthro     -0.0363415449 -0.008130320  0.075745646 -0.021128726
## auc_dusk_bio         0.0437421191 -0.201485396  0.084070236 -0.097786525
## dusk_auc_an_min_bio -0.0521754601  0.108320401  0.009218627  0.039648024
## peak_dusk_anth       0.0673524272  0.011080652  0.034355594  0.016183404
## peak_dusk_bio        0.0180685599 -0.208221640  0.031511841 -0.001394382
##                              PC9         PC10          PC11         PC12
## steepest_anth        0.117794161 -0.257289290  0.1873047661 -0.255246584
## steepest_bio         0.295677030  0.156289433  0.1940698715 -0.159976500
## steepest_time_anth  -0.231676635  0.231462946  0.2489368959 -0.281488643
## steepest_time_bio    0.114116752 -0.447913177 -0.2868098458 -0.427804730
## auc_anthro           0.022122451  0.006972558 -0.0057254751  0.025697858
## auc_bio             -0.009114205 -0.104505836  0.0931313137 -0.013358637
## n_peaks_anth        -0.016104329  0.306309302 -0.2447743014  0.093943596
## n_peaks_bio          0.316142641  0.274448280  0.1693547332 -0.258905476
## auc_an_min_bio       0.020368717  0.055265576 -0.0488975948  0.024992647
## peak_anth           -0.041183745 -0.028082232  0.0009548062  0.004136355
## peak_bio             0.206607249  0.035359550  0.1761676025 -0.123017198
## peak_anth_time       0.065521180  0.010428624 -0.1265870521  0.153064720
## peak_bio_time        0.097852098  0.173524168  0.3663243892  0.531427412
## steepest_dawn_anth  -0.476797935 -0.039344304  0.2654327878 -0.011158820
## steepest_dawn_bio    0.122878427  0.364869962 -0.2732002961 -0.065016509
## auc_dawn_anthro      0.061917247 -0.054670462  0.0324158919  0.048099163
## auc_dawn_bio        -0.006650633 -0.344041702 -0.1453597678  0.283631732
## dawn_auc_an_min_bio  0.048332640  0.150501869  0.1036306828 -0.121891110
## peak_dawn_anth      -0.055868990 -0.072682078  0.0829006023  0.048329000
## peak_dawn_bio        0.036299889 -0.167800162 -0.1561850252  0.114365365
## steepest_dusk_anth   0.399522982 -0.175773181  0.0328755985  0.343039315
## steepest_dusk_bio   -0.134695017  0.287457833 -0.5236759538  0.056462403
## auc_dusk_anthro     -0.048498299  0.001395329 -0.0387478228 -0.059286745
## auc_dusk_bio        -0.359187754 -0.082572592  0.0870263770  0.039908432
## dusk_auc_an_min_bio  0.167528559  0.047947093 -0.0785687803 -0.067254756
## peak_dusk_anth       0.052115762 -0.044393160 -0.0066192549  0.022789618
## peak_dusk_bio       -0.282063398  0.054813546 -0.0467965369 -0.017027429
##                             PC13         PC14         PC15         PC16
## steepest_anth       -0.354692087  0.406673657 -0.311789737  0.025504369
## steepest_bio         0.144454935  0.076575115  0.200253997 -0.429185383
## steepest_time_anth   0.114872189  0.089920674 -0.071626492 -0.028826574
## steepest_time_bio    0.087199504 -0.178381826  0.131719028  0.051541190
## auc_anthro           0.050116289 -0.031337494 -0.010049317 -0.059659434
## auc_bio              0.080626121 -0.028464493  0.021275880 -0.113677631
## n_peaks_anth         0.253134800 -0.219912878  0.376223897  0.059550284
## n_peaks_bio          0.225582785 -0.039774131 -0.107158024  0.732853500
## auc_an_min_bio      -0.002529492 -0.008967989 -0.017488213  0.011517291
## peak_anth            0.039074758  0.022218164 -0.114064076 -0.058823992
## peak_bio             0.085967469 -0.017324874  0.132176033 -0.382743483
## peak_anth_time      -0.067932581  0.104242591  0.018506855  0.092131770
## peak_bio_time        0.002509331  0.115890924 -0.180049770  0.016388603
## steepest_dawn_anth   0.233296349  0.042601842  0.083735771  0.041405749
## steepest_dawn_bio   -0.549971082 -0.107805928 -0.120275718 -0.041766277
## auc_dawn_anthro     -0.169780848  0.202893809  0.376443680  0.113851491
## auc_dawn_bio         0.172223417  0.227677236  0.019228863  0.111637812
## dawn_auc_an_min_bio -0.217542919  0.020653429  0.260913731  0.020489166
## peak_dawn_anth      -0.133778331  0.196802327  0.374016472  0.139782091
## peak_dawn_bio        0.105169145  0.140207174 -0.030449485  0.091782597
## steepest_dusk_anth  -0.076957596 -0.352196667  0.001034609  0.005822512
## steepest_dusk_bio    0.119949633  0.410857249 -0.130103825 -0.085940958
## auc_dusk_anthro      0.151521550 -0.187041577 -0.342333283 -0.086620857
## auc_dusk_bio        -0.240906711 -0.364993256 -0.040554378  0.081514714
## dusk_auc_an_min_bio  0.250781272  0.066629966 -0.234429460 -0.111442827
## peak_dusk_anth       0.113502974 -0.222159246 -0.239259848 -0.064477654
## peak_dusk_bio       -0.162054936 -0.178434599 -0.051581991 -0.017491327
##                             PC17         PC18         PC19         PC20
## steepest_anth       -0.163446447  0.062854197 -0.048892602 -0.008870189
## steepest_bio         0.065428426  0.425060391 -0.109229354 -0.307122868
## steepest_time_anth  -0.027831059  0.023787596 -0.017824208  0.008549388
## steepest_time_bio    0.035990800 -0.004149008 -0.024547165 -0.035542351
## auc_anthro           0.169015347 -0.002525537 -0.249544880 -0.071067954
## auc_bio             -0.064680838 -0.720205569 -0.126070765 -0.139687281
## n_peaks_anth        -0.165697693 -0.010671269 -0.033460332 -0.003259830
## n_peaks_bio          0.076259046  0.003365369  0.007484643 -0.043175614
## auc_an_min_bio       0.153237048  0.344311458 -0.119760628  0.015772567
## peak_anth            0.763048587 -0.155522253 -0.052559263 -0.004571518
## peak_bio            -0.023782227 -0.089633313  0.116649826  0.372026715
## peak_anth_time       0.010693979  0.041027736 -0.007678549 -0.001228696
## peak_bio_time       -0.040816441 -0.030174112 -0.002512019  0.025951866
## steepest_dawn_anth  -0.012308821  0.036050515 -0.058669779 -0.027069345
## steepest_dawn_bio    0.016510508 -0.062961972 -0.003518880 -0.117027368
## auc_dawn_anthro     -0.188761733  0.007195740 -0.080588609 -0.080511941
## auc_dawn_bio        -0.038386692  0.151093709  0.087061740 -0.397116014
## dawn_auc_an_min_bio -0.114961935 -0.078224295 -0.106193038  0.161163206
## peak_dawn_anth      -0.081256875 -0.069635183  0.285693126  0.013345080
## peak_dawn_bio        0.007500450  0.194460481 -0.150117777  0.658458116
## steepest_dusk_anth   0.009091359 -0.019294443 -0.062335763 -0.045767044
## steepest_dusk_bio   -0.049607425 -0.076306532 -0.132399800  0.032774236
## auc_dusk_anthro     -0.370005982  0.038330265 -0.163355965 -0.139794821
## auc_dusk_bio        -0.115310385  0.170329218 -0.427800277  0.004809889
## dusk_auc_an_min_bio -0.212783385 -0.067912201  0.120115178 -0.107868942
## peak_dusk_anth      -0.169783063  0.048641283  0.346357320  0.236113784
## peak_dusk_bio        0.092690323  0.148373526  0.613328740 -0.100119229
##                             PC21         PC22         PC23          PC24
## steepest_anth       -0.129185719  0.037555776  0.053234023 -0.0048848372
## steepest_bio         0.160511570 -0.206531672  0.274759820  0.1811600766
## steepest_time_anth  -0.003180634  0.024310199 -0.013653198 -0.0058369827
## steepest_time_bio    0.027857466  0.017216131  0.007652210  0.0004574666
## auc_anthro          -0.522628981  0.255852752  0.299212947 -0.1719339126
## auc_bio             -0.179394623 -0.065678058  0.282568847  0.1142538420
## n_peaks_anth        -0.087427672  0.005379010  0.001000338 -0.0031337882
## n_peaks_bio          0.005230193  0.058618532 -0.036857990 -0.0294502734
## auc_an_min_bio      -0.291496301  0.216475518  0.080442507 -0.1791715493
## peak_anth            0.255728127 -0.163113269 -0.245905196  0.0954356079
## peak_bio            -0.040247357  0.359893737 -0.457569210 -0.3389624762
## peak_anth_time       0.021319986 -0.054941070  0.004828499 -0.0405310736
## peak_bio_time        0.005239073 -0.007533978  0.010498026  0.0023343263
## steepest_dawn_anth  -0.070239451  0.023122951 -0.093809966  0.1072019216
## steepest_dawn_bio    0.047638983  0.084331935 -0.037630691  0.0068938316
## auc_dawn_anthro     -0.120460958 -0.001396583 -0.351643210  0.3755410409
## auc_dawn_bio         0.010440289  0.258634238 -0.225522056  0.0364949427
## dawn_auc_an_min_bio -0.092652478 -0.143792059 -0.129136431  0.2507305381
## peak_dawn_anth       0.355877711 -0.097975473  0.326755177 -0.5120200393
## peak_dawn_bio       -0.132687855 -0.383059960  0.128752272  0.0785269550
## steepest_dusk_anth  -0.050820633 -0.130088782 -0.055713598 -0.0440293319
## steepest_dusk_bio    0.108841444  0.102248838  0.021568679  0.0069413280
## auc_dusk_anthro      0.080488975 -0.291653802 -0.214400747 -0.2092324465
## auc_dusk_bio         0.241956198  0.082557952 -0.023290772 -0.0862300667
## dusk_auc_an_min_bio -0.076886458 -0.266236873 -0.148018725 -0.1083849200
## peak_dusk_anth       0.244058007  0.398433683  0.273665805  0.4642083339
## peak_dusk_bio       -0.414377831 -0.274414984 -0.051166312  0.0184268352
##                              PC25          PC26          PC27
## steepest_anth        0.000000e+00 -5.503995e-17  0.000000e+00
## steepest_bio        -2.939159e-16  1.807830e-16  1.047638e-16
## steepest_time_anth  -7.104541e-17  8.374250e-17 -1.265754e-16
## steepest_time_bio    1.986042e-16 -2.451116e-16  1.673156e-16
## auc_anthro           3.938333e-01 -3.763582e-01 -3.403002e-02
## auc_bio             -2.618986e-01  2.502776e-01  2.262991e-02
## n_peaks_anth         7.833443e-18  5.849717e-17  3.027276e-17
## n_peaks_bio         -2.480637e-17  3.188391e-18  7.113060e-17
## auc_an_min_bio      -5.449315e-01  5.207519e-01  4.708599e-02
## peak_anth           -2.504940e-16  2.872028e-16 -8.103486e-17
## peak_bio            -1.250237e-16  2.060501e-16 -8.985396e-17
## peak_anth_time      -7.947731e-18  1.095479e-16  1.145265e-16
## peak_bio_time       -7.062803e-18  7.809979e-18  5.971103e-17
## steepest_dawn_anth   5.616323e-17  6.211261e-17  8.227522e-17
## steepest_dawn_bio    3.535624e-17 -6.879728e-17 -2.170507e-17
## auc_dawn_anthro     -1.690329e-02 -6.557516e-02  5.296109e-01
## auc_dawn_bio         1.293749e-02  5.019011e-02 -4.053551e-01
## dawn_auc_an_min_bio  2.343443e-02  9.091226e-02 -7.342432e-01
## peak_dawn_anth       4.007930e-16 -2.176188e-16  4.232041e-17
## peak_dawn_bio       -3.705679e-17  1.865976e-17  1.149107e-17
## steepest_dusk_anth  -2.936650e-19  2.077346e-16 -4.540286e-17
## steepest_dusk_bio   -1.742642e-16 -2.936681e-16  2.083095e-17
## auc_dusk_anthro      3.785424e-01  3.906530e-01  6.045145e-02
## auc_dusk_bio        -2.858677e-01 -2.950133e-01 -4.565173e-02
## dusk_auc_an_min_bio -5.033261e-01 -5.194289e-01 -8.037883e-02
## peak_dusk_anth       1.000736e-16 -2.442371e-16  4.435739e-17
## peak_dusk_bio        1.246344e-16 -2.438887e-16 -9.846171e-17
summary(pca_out)
## Importance of components:
##                          PC1    PC2     PC3     PC4     PC5     PC6
## Standard deviation     3.270 1.9360 1.37859 1.23410 1.10911 0.98870
## Proportion of Variance 0.396 0.1388 0.07039 0.05641 0.04556 0.03621
## Cumulative Proportion  0.396 0.5348 0.60523 0.66164 0.70720 0.74340
##                            PC7     PC8     PC9    PC10    PC11    PC12
## Standard deviation     0.96826 0.91872 0.89914 0.82745 0.80077 0.73428
## Proportion of Variance 0.03472 0.03126 0.02994 0.02536 0.02375 0.01997
## Cumulative Proportion  0.77813 0.80939 0.83933 0.86469 0.88844 0.90841
##                           PC13    PC14    PC15    PC16    PC17    PC18
## Standard deviation     0.71081 0.67370 0.61798 0.56374 0.42117 0.40193
## Proportion of Variance 0.01871 0.01681 0.01414 0.01177 0.00657 0.00598
## Cumulative Proportion  0.92712 0.94393 0.95808 0.96985 0.97642 0.98240
##                           PC19    PC20    PC21    PC22    PC23    PC24
## Standard deviation     0.31974 0.30787 0.28364 0.27083 0.25229 0.24651
## Proportion of Variance 0.00379 0.00351 0.00298 0.00272 0.00236 0.00225
## Cumulative Proportion  0.98618 0.98970 0.99268 0.99539 0.99775 1.00000
##                            PC25     PC26      PC27
## Standard deviation     5.07e-16 4.21e-16 3.695e-16
## Proportion of Variance 0.00e+00 0.00e+00 0.000e+00
## Cumulative Proportion  1.00e+00 1.00e+00 1.000e+00
pca_df<-as.data.frame(pca_out$x)

pca_df$id<-rownames(pca_df)

pca_df<-merge(df_all,pca_df, by= "id" )

K-Means Clustering

df_scale<-scale(df_mat)

km_out<-kmeans(df_scale, 2, nstart = 15)

plot(df_scale, col= km_out$cluster)

Hierachical Clustering

hc.complete = hclust(dist(df_scale), method = "complete")
plot(hc.complete)

hc.cut = cutree(hc.complete,3)

hc_df<-data.frame(hc.cut)
hc_df$id<-row.names(hc_df)
colnames(hc_df)<-c("cluster", "id")
df_clus<-merge(pca_df, hc_df, by = "id")

ggplot(df_clus, aes(x = Long, y = Lat, group = cluster, shape = Site_Type, colour = as.factor(cluster)))+
  geom_jitter(size = 2, width = 0.01, height = 0.01)

ggplot(df_clus, aes(x = PC1, y = siteDiversity, colour = as.factor(cluster)))+
  geom_point()

ggplot(df_clus, aes(x = PC1, y = Site_Type, colour = as.factor(cluster)))+
  geom_point()

ggplot(df_clus, aes(x = PC2, y = Site_Type, colour = as.factor(cluster)))+
  geom_point()

ggplot(df_clus, aes(x = PC3, y = Site_Type, colour = as.factor(cluster)))+
  geom_point()

ggplot(df_clus, aes(x = PC1, y = maxTemp, colour = as.factor(cluster)))+
  geom_point()