#####Packages Required:
library(tidyverse)
library(readr)
library(car)
library(sp)
library(sf)
##### Image Selection and Area Measurement 

### Quadrat selection 
#NOTE: set.seed value was arbitrarily set to 42 for inside Drum Lake and 43 for outside Drum Lake
#(subsequent sites had seed values of n+2 on an inside v. outside basis)
set.seed(42)
sample(1:5, 5, replace = TRUE)
  
### Insertion of ROI as csv
#NOTE: code here uses quadrat DL_I_1_1 as an example. 
#Other quadrats will only differ in seed values and use csvs recalled)
setwd("C:/Users/User/OneDrive - UBC/Alec/2023_DrumLake/DL_2023_Measurements/Raw")

### Calculating total area per ROI  
  sum_of_value <- sum(DL_I_1_1_results$Area, na.rm = TRUE)
  sum_of_value
##### Calculating Coverage Difference and Generating Mercator Coordinates

### Get data
setwd("C:/Users/User/OneDrive - UBC/Alec/crust_measurements")
cov_data <- read.csv("honors_data_new.csv")
metadata <- read.csv("RRA_metadata.csv")

### Lat/long data
metadata$Decimal_coordinates_long <- -abs(metadata$Decimal_coordinates_long)
latlong <- metadata %>%
  select(Ref_Area, Decimal_coordinates_lat, Decimal_coordinates_long)
coordinates(latlong) <- ~Decimal_coordinates_long+Decimal_coordinates_lat
proj4string(latlong) <- CRS("+proj=longlat +ellps=GRS80")
latlong_merc <- spTransform(latlong, CRS("+proj=merc +ellps=GRS80"))
merc_coords <- coordinates(latlong_merc)
latlong_df <- data.frame(Ref_Area = latlong_merc$Ref_Area,Mercator_X = merc_coords[, 1],
                         Mercator_Y = merc_coords[, 2])
latlong_df

metadata_new <- metadata %>%
  left_join(latlong_df, by = "Ref_Area")
metadata_new

##### Get change in  coverage per site

### Get mean per RRA + location and convert NAs to zero
mean_cov_data <- cov_data %>%
  group_by(Location, Ref_Area) %>%
  summarise(avg_cov_diff_per_site = mean(replace_na(Crust_Coverage.cm2., 0)))%>%
  ungroup()

### Reorient data from long to wide
sep_rows_cov <- mean_cov_data %>%
  pivot_wider(id_cols = Ref_Area,
              names_from = Location, values_from = avg_cov_diff_per_site)

### Remove sites with no coverage detected
subset_data <- sep_rows_cov %>%
  filter(!(Ref_Area=="Froleck"), !(Ref_Area=="Tunkwa_1993"), !(Ref_Area== "Tunkwa_Old"), !(Ref_Area== "Hamilton_Stipa_Nelson"))
  
### Calculating cov diff (inside-outside)
subset_data <- subset_data %>%
  mutate(cov_diff = subset_data$'1' - subset_data$`2`)

### Test distribution for normality
hist(subset_data$cov_diff)
shapiro.test(subset_data$cov_diff)

### Filter metadata to match subset
metadata_trimmed <- metadata_new %>%
  filter(Ref_Area %in% subset_data$Ref_Area)
metadata_trimmed

### Add cov diff to metadata on a per site basis
all_data <- subset_data %>%
  left_join(metadata_trimmed, by = "Ref_Area")
##### Statistical testing

### Set oldest site as intercept and ensure Ref_Areas are factors
all_data$Ref_Area <- factor(all_data$Ref_Area)
all_data$Ref_Area <- relevel(all_data$Ref_Area, ref = "Drum_Lake")
str(all_data)

### Perform ANOVA
anova_test <- aov(cov_diff ~ Establishment_Year + Elevation.m., all_data)
summary(anova_test)
coefficients(anova_test)
alias(anova_test)

### Graph ANOVA
plot(anova_test, which = 2)
qqPlot(anova_test$residuals, id = FALSE)

### Check ANOVA residuals
residuals(anova_test)
##### Figure Generation

### Elevation figure
ggplot(all_data, aes(x = Elevation.m., y = cov_diff)) +
  geom_point(size = 4) +
  geom_smooth(method = "lm", se = FALSE, color = "black") +
  labs(
    x = "Elevation (m)",
    y = "Difference in Biocrust Coverage (Inside - Outside)"
  ) +
  theme_minimal() +
  theme(
    text = element_text(size = 14),
    axis.title = element_text(face = "bold")
  )

### Establishment Year figure
ggplot(all_data, aes(x = factor(Establishment_Year), y = cov_diff)) +
  geom_boxplot() + geom_jitter(width = 0.1, size = 3, alpha = 0.8) +
  labs(x = "Establishment Year", y = "Difference in Biocrust Coverage (Inside - Outside)") +
  theme_minimal() +theme(text = element_text(size = 14),axis.title = element_text(face = "bold"))