Analysis of coral reef effects on reef fish assemblages in Florida

This is an analysis of the NCRMP fish and coral datasets. The purpose of this analysis is to (1) match sites surveyed for both coral and fish within the same year that were less than 25m apart, (2) summarize each dataset to extract the appropriate independent (IVs) and dependent variables (DVs) for analysis (e.g. coral diversity, colony size, weighted rugosity, percent cover, fish species richness, Lmax, functional richness, cross-scale redundancy, etc.), (3) evaluate the effects of the IVs on the DVs using a full subsets approach (FSS) with Generalized Additive Mixed Models (GAMMs) to explore predictive power of models and compare variable importance, (4) and identify the subset of coral reef predictors that have the most influence on coral reef fish assemblages.

library(dplyr) 
library(tidyr)
library(ncrmp.benthics.analysis)
library(rvc)
library(stringr)
library(geosphere)
library(ggplot2)
library(gtsummary)
library(lubridate)
library(tidyverse)
library(FSSgam)
library(mgcv)
library(readxl)
library(vegan)
library(corrplot)
library(car)
library(doBy)
library(gplots)
library(RColorBrewer)

Import data and create keys

# Import RVC Fish dataset to create a key
fish_raw_2016_2023_FL_all = getRvcData(years = 2016:2023, regions = c("FLA KEYS", "DRY TORT", "SEFCRI"))

# Create a key for the fish dataset
fish_2016_2022_FL_key <- fish_raw_2016_2023_FL_all$sample_data %>%
  mutate(STATION_UH = as.factor(case_when(grepl("U",PRIMARY_SAMPLE_UNIT) ~ "U",
                                         grepl("H", PRIMARY_SAMPLE_UNIT) ~ "H")),
         YEAR = case_when((YEAR == 2021 & REGION == "SEFCRI")~ 2020, TRUE ~ YEAR), # Correct sampling year is 2020 for these samples, not 2021 in Southeast Florida
         YEAR = case_when((YEAR == 2023 & REGION == "DRY TORT")~ 2022, TRUE ~ YEAR), # Reassign these to 2022 and explain in text the reason for this (planned sampling year was 2022 but sampling extended into 2023)
         PRIMARY_SAMPLE_UNIT = str_remove_all(PRIMARY_SAMPLE_UNIT,"[UH]"),
         PRIMARY_SAMPLE_UNIT = str_remove(PRIMARY_SAMPLE_UNIT,"2022_"), # remove 2022_ from Dry Tortugas 2023 dataset PSU
         PRIMARY_SAMPLE_UNIT = str_remove(PRIMARY_SAMPLE_UNIT,"2023_"), # remove 2023_ from Dry Tortugas 2023 dataset PSU
         UNIQUE_ID = paste(REGION,YEAR,PRIMARY_SAMPLE_UNIT,sep = "_")) %>%
  dplyr::select(UNIQUE_ID,STATION_NR,LAT_DEGREES,LON_DEGREES,MONTH,DAY,YEAR,STATION_UH,REGION,PRIMARY_SAMPLE_UNIT) %>%
  distinct()

# Create a summary table of the fish dataset key
fish_key_table <- fish_2016_2022_FL_key %>% 
  tbl_summary(by = YEAR, 
              include = c(,REGION, STATION_NR, STATION_UH, MONTH),
              label = list(REGION ~ "Region",
                           STATION_NR ~ "Fish Station Numbers",
                           STATION_UH ~ "Fish Station U or H",
                           MONTH ~ "Sampling Month")) %>%
              modify_header(label = "**Variable**") %>%
              bold_labels() %>%
              modify_caption("**Table 1. Summary of the surveyed sites in the fish dataset from 2016 to 2022 in Florida**")

## Examine the Station U and H designations to understand when they were assigned
# Look at matches between U and H stations
fish_station_H_only <- fish_2016_2022_FL_key %>% filter(STATION_UH == "H")
fish_station_U_only <- fish_2016_2022_FL_key %>% filter(STATION_UH == "U")
fish_station_match_UH <- fish_station_H_only %>% inner_join(fish_station_U_only, by = "UNIQUE_ID") # 50 stations with matching U and H

# Import coral dataset benthic cover
coral_raw_ALL <- NCRMP_benthic_cover_All_regions_years

# Reduce dataset to 2016-2022 in Florida
coral_raw_2016_2022_FL <- coral_raw_ALL %>%
  filter(REGION %in% c("SEFCRI", "FLK", "Tortugas"), YEAR %in% 2016:2023) 

# Create the coral key
coral_2016_2022_FL_key <- coral_raw_2016_2022_FL %>%
  mutate(REGION = case_when(REGION == "FLK" ~ "FLA KEYS",
                            REGION == "Tortugas" ~ "DRY TORT",
                            REGION == "SEFCRI" ~ "SEFCRI")) %>% 
  mutate(UNIQUE_ID = paste(REGION,YEAR,PRIMARY_SAMPLE_UNIT,sep = "_")) %>%
  dplyr::select(UNIQUE_ID,STATION_NR,LAT_DEGREES,LON_DEGREES,MONTH,DAY,YEAR,REGION,PRIMARY_SAMPLE_UNIT) %>%
  distinct() 

# Create a summary table of the coral dataset key
coral_key_table <- coral_2016_2022_FL_key %>% 
  tbl_summary(by = YEAR, 
              include = c(REGION, STATION_NR, MONTH),
              label = list(REGION ~ "Region",
                           STATION_NR ~ "Coral Station Numbers",
                           MONTH ~ "Sampling Month")) %>%
              modify_header(label = "**Variable**") %>%
              bold_labels() %>%
              modify_caption("**Table 2. Summary of the surveyed sites in the coral dataset from 2016 to 2022 in Florida**")

## Join the coral and fish datasets to create a new key
fish_coral_2016_2022_FL_key <- coral_2016_2022_FL_key %>%
  inner_join(fish_2016_2022_FL_key, by = "UNIQUE_ID", suffix = c("_CORAL", "_FISH"))

# Create a summary table of the joined coral and fish dataset keys
fish_coral_key_table <- fish_coral_2016_2022_FL_key %>% 
  tbl_summary(by = YEAR_CORAL, 
              include = c(REGION_FISH, STATION_NR_CORAL, STATION_NR_FISH, STATION_UH, MONTH_CORAL),
              label = list(REGION_FISH ~ "Region",
                           STATION_NR_CORAL ~ "Coral Station Numbers",
                           STATION_NR_FISH ~ "Fish Station Numbers",
                           STATION_UH ~ "Fish Station U or H",
                           MONTH_CORAL ~ "Sampling Month"))%>%
              modify_header(label = "**Variable**") %>%
              bold_labels() %>%
              modify_caption("**Table 3. Summary of all of the matching sites surveyed for both the coral dataset and the fish dataset from 2016 to 2022 in Florida**")

fish_key_table # Fish key dataset summary

**Table 1. Summary of the surveyed sites in the fish dataset from 2016 to 2022 in Florida**
Variable	2016 N = 1,911¹	2018 N = 2,096¹	2020 N = 305¹	2021 N = 299¹	2022 N = 1,253¹
Region
DRY TORT	544 (28%)	672 (32%)	0 (0%)	299 (100%)	306 (24%)
FLA KEYS	797 (42%)	843 (40%)	0 (0%)	0 (0%)	648 (52%)
SEFCRI	570 (30%)	581 (28%)	305 (100%)	0 (0%)	299 (24%)
Fish Station Numbers
1	960 (50%)	1,054 (50%)	305 (100%)	299 (100%)	1,253 (100%)
2	951 (50%)	1,042 (50%)	0 (0%)	0 (0%)	0 (0%)
Fish Station U or H
H	35 (1.8%)	15 (0.7%)	0 (NA%)	0 (NA%)	0 (NA%)
U	1,876 (98%)	2,081 (99%)	0 (NA%)	0 (NA%)	0 (NA%)
Unknown	0	0	305	299	1,253
Sampling Month
1	0 (0%)	0 (0%)	4 (1.3%)	0 (0%)	0 (0%)
5	354 (19%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)
6	537 (28%)	533 (25%)	15 (4.9%)	184 (62%)	267 (21%)
7	149 (7.8%)	481 (23%)	87 (29%)	115 (38%)	404 (32%)
8	298 (16%)	513 (24%)	80 (26%)	0 (0%)	317 (25%)
9	364 (19%)	319 (15%)	59 (19%)	0 (0%)	171 (14%)
10	74 (3.9%)	38 (1.8%)	47 (15%)	0 (0%)	65 (5.2%)
11	127 (6.6%)	150 (7.2%)	7 (2.3%)	0 (0%)	29 (2.3%)
12	8 (0.4%)	62 (3.0%)	6 (2.0%)	0 (0%)	0 (0%)
¹ n (%)

coral_key_table # coral key dataset summary

**Table 2. Summary of the surveyed sites in the coral dataset from 2016 to 2022 in Florida**
Variable	2016 N = 335¹	2018 N = 306¹	2020 N = 93¹	2021 N = 146¹	2022 N = 407¹
Region
DRY TORT	115 (34%)	139 (45%)	0 (0%)	129 (88%)	113 (28%)
FLA KEYS	122 (36%)	90 (29%)	0 (0%)	12 (8.2%)	181 (44%)
SEFCRI	98 (29%)	77 (25%)	93 (100%)	5 (3.4%)	113 (28%)
Coral Station Numbers
1	335 (100%)	257 (84%)	93 (100%)	146 (100%)	407 (100%)
2	0 (0%)	49 (16%)	0 (0%)	0 (0%)	0 (0%)
Sampling Month
1	0 (0%)	0 (0%)	0 (0%)	5 (3.4%)	0 (0%)
5	85 (25%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)
6	61 (18%)	0 (0%)	0 (0%)	0 (0%)	47 (12%)
7	17 (5.1%)	45 (15%)	0 (0%)	0 (0%)	30 (7.4%)
8	25 (7.5%)	123 (40%)	5 (5.4%)	82 (56%)	167 (41%)
9	90 (27%)	51 (17%)	52 (56%)	59 (40%)	136 (33%)
10	14 (4.2%)	57 (19%)	22 (24%)	0 (0%)	19 (4.7%)
11	11 (3.3%)	28 (9.2%)	1 (1.1%)	0 (0%)	3 (0.7%)
12	32 (9.6%)	2 (0.7%)	13 (14%)	0 (0%)	5 (1.2%)
¹ n (%)

fish_coral_key_table # Fish and coral key dataset summary

**Table 3. Summary of all of the matching sites surveyed for both the coral dataset and the fish dataset from 2016 to 2022 in Florida**
Variable	2016 N = 596¹	2018 N = 557¹	2020 N = 92¹	2021 N = 81¹	2022 N = 302¹
Region
DRY TORT	196 (33%)	262 (47%)	0 (0%)	81 (100%)	33 (11%)
FLA KEYS	212 (36%)	155 (28%)	0 (0%)	0 (0%)	156 (52%)
SEFCRI	188 (32%)	140 (25%)	92 (100%)	0 (0%)	113 (37%)
Coral Station Numbers
1	596 (100%)	459 (82%)	92 (100%)	81 (100%)	302 (100%)
2	0 (0%)	98 (18%)	0 (0%)	0 (0%)	0 (0%)
Fish Station Numbers
1	298 (50%)	281 (50%)	92 (100%)	81 (100%)	302 (100%)
2	298 (50%)	276 (50%)	0 (0%)	0 (0%)	0 (0%)
Fish Station U or H
H	17 (2.9%)	4 (0.7%)	0 (NA%)	0 (NA%)	0 (NA%)
U	579 (97%)	553 (99%)	0 (NA%)	0 (NA%)	0 (NA%)
Unknown	0	0	92	81	302
Sampling Month
5	142 (24%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)
6	98 (16%)	0 (0%)	0 (0%)	0 (0%)	47 (16%)
7	24 (4.0%)	90 (16%)	0 (0%)	0 (0%)	29 (9.6%)
8	50 (8.4%)	225 (40%)	5 (5.4%)	53 (65%)	156 (52%)
9	172 (29%)	94 (17%)	52 (57%)	28 (35%)	52 (17%)
10	28 (4.7%)	94 (17%)	21 (23%)	0 (0%)	17 (5.6%)
11	20 (3.4%)	50 (9.0%)	1 (1.1%)	0 (0%)	1 (0.3%)
12	62 (10%)	4 (0.7%)	13 (14%)	0 (0%)	0 (0%)
¹ n (%)

FISH DATASET NOTES: There were a total of 5864 fish stations and 1287 coral stations surveyed from 2016-2022 and surveys occurred about every 2 years in Florida. Both coral and fish sampling was reduced in 2020 and continued into 2021 due to the COVID-19 pandemic. Sampling in the Dry Tortugas that was planned for 2022 occurred in 2022 and 2023 due to complications from the COVID-19 pandemic. Sampling in 2023 in the Dry Tortugas was reassigned to 2022 to account for the sampling design.

For the fish surveys in 2016 and 2018, at each Primary Sample Unit (PSU) there are two STATION NRs (1 and 2) indicating that there were 2 diver teams conducting RVCs within the PSU. The size of the PSU in 2016 and 2018 was 100m2 and this was reduced to 50m2 in 2020-2022. Beginning in 2020, only one diver team conducted an RVC within the PSU, so there is only one STATION NR (1) at each PSU in 2020-2022.

All fish stations in 2016 and 2018 were given designations of U or H to indicate that the two diver teams (STATION_NR 1 and 2) at a station were sampling different habitat types. In 2020, the survey design switched to smaller grid sizes and only one diver team was deployed at each station. Most duplicate NR stations in 2016 and 2018 (STATION NR1 and STATION NR2) were both given the station “U” designation. Only 50 stations were given the “H” designation, but these were all matched with a station “U” at each site.

CORAL DATASET NOTES: For the coral surveys, at each PSU from 2016-2022, only one diver team conducted coral surveys, thus there is only one STATION NR (1 or 2) within each PSU. The STATION NR designation was 1 in every year except 2018, when 16% of the surveys were given the STATION NR 2 designation.

COMBINED FISH AND CORAL DATASET NOTES: The fish and coral datasets were combined based on a unique ID that included the Florida region, year, and PSU. There were a total of 1628 matching stations between the fish and coral datasets that included matching region, year and PSU assignments.

Match coral and fish sites

# Find the distance between the fish and coral site coordinates using Vincenty Ellipsoid method and reduce the dataset to include the fish and coral stations that are closest together
fish_coral_2016_2022_FL_key2 <- fish_coral_2016_2022_FL_key %>%
  mutate(VE_DIST = distVincentyEllipsoid(cbind(LAT_DEGREES_CORAL,LON_DEGREES_CORAL),cbind(LAT_DEGREES_FISH,LON_DEGREES_FISH), a=6378137, b=6356752.3142, f=1/298.257223563)) %>%
  group_by(UNIQUE_ID) %>%  ## For sites with multiple fish Station_NR's for each coral station NR
  slice_min(VE_DIST) %>% # select the STATION_NRs that are closest (min dist)
  distinct(UNIQUE_ID, .keep_all = TRUE) # drop duplicated sites (same coordinates at both fish station NRs) 

# Create a grouping vector to identify sites that are <25m, 25-50m, >50m apart
fish_coral_2016_2022_FL_key3 <- fish_coral_2016_2022_FL_key2 %>%
  mutate(DIST_GRP = as.factor(case_when(VE_DIST <= 25.4 ~ '<=25m',VE_DIST <= 50.4 ~ '25-50m', VE_DIST > 50.4 ~ '>50m')))

sites_less_than_25m <- round(sum(fish_coral_2016_2022_FL_key3$DIST_GRP == '<=25m')/dim(fish_coral_2016_2022_FL_key3)[1]*100)   
sites_25m_50m <- round(sum(fish_coral_2016_2022_FL_key3$DIST_GRP == '25-50m')/dim(fish_coral_2016_2022_FL_key3)[1]*100)   
sites_greater_than_50m <- round(sum(fish_coral_2016_2022_FL_key3$DIST_GRP == '>50m')/dim(fish_coral_2016_2022_FL_key3)[1]*100)   

# Create a summary table of the distance between sites
fish_coral_key_dist_table <- fish_coral_2016_2022_FL_key3 %>% 
  tbl_summary(by = DIST_GRP, 
              percent = "row",
              include = c(REGION_CORAL,YEAR_CORAL,STATION_NR_CORAL, STATION_NR_FISH, STATION_UH, MONTH_CORAL),
              label = list(REGION_CORAL ~ "Region",
                           YEAR_CORAL ~ "Year",
                           STATION_NR_CORAL ~ "Coral Station Numbers",
                           STATION_NR_FISH ~ "Fish Station Numbers",
                           STATION_UH ~ "Fish Station U or H",
                           MONTH_CORAL ~ "Month"))%>%
              modify_header(label = "**Variable**") %>%
              bold_labels() %>%
              modify_caption("**Table 4. Summary of the coral and fish sites surveyed that were <25m, 25-50m, and >50m apart from 2016 to 2022 in Florida. Percentages given are for each row in the table.**") 

# Create a key for the coral and fish datasets that only includes sites <25m apart
fish_coral_2016_2022_FL_key4 <- fish_coral_2016_2022_FL_key3 %>%
  filter(DIST_GRP == "<=25m")

# Find the difference between sampling dates at a site
fish_coral_2016_2022_FL_key5 <- fish_coral_2016_2022_FL_key4 %>%
  mutate(DATE_CORAL = make_date(year = YEAR_CORAL, month = MONTH_CORAL, day = DAY_CORAL),
         DATE_FISH = make_date(year = YEAR_FISH, month = MONTH_FISH, day = DAY_FISH),
         DIFF_DAY = abs(difftime(DATE_CORAL, DATE_FISH, units = "days"))) # diff b/w coral and fish sampling dates

# Calculate mean difference between sampling dates overall
mean_diff_days <- round(mean(fish_coral_2016_2022_FL_key5$DIFF_DAY))
mean_diff_days

# Create a summary table of the difference between sites and sampling dates
fish_coral_key_FL_key5_table <- fish_coral_2016_2022_FL_key5 %>% 
  tbl_summary(by = YEAR_CORAL, 
              include = c(REGION_CORAL, STATION_NR_CORAL, STATION_NR_FISH, STATION_UH, MONTH_CORAL, LAT_DEGREES_CORAL, LON_DEGREES_CORAL, 
                          DIFF_DAY),
              statistic = list(c(LAT_DEGREES_CORAL,LON_DEGREES_CORAL) ~ "{min}, {max}", 
                               DIFF_DAY ~ "{mean}, ({sd})"),
              digits = all_continuous() ~ 2,
              label = list(REGION_CORAL ~ "Region",
                           STATION_NR_CORAL ~ "Coral Station Numbers",
                           STATION_NR_FISH ~ "Fish Station Numbers",
                           STATION_UH ~ "Fish Station U or H",
                           MONTH_CORAL ~ "Sampling Month",
                           LAT_DEGREES_CORAL ~ "Latitude (min, max)",
                           LON_DEGREES_CORAL ~ "Longitude (min, max)",
                           DIFF_DAY ~ "Difference between sampling dates")) %>%
              modify_header(label = "**Variable**") %>%
              bold_labels() %>%
              modify_caption("**Table 5. Summary of the coral and fish sites surveyed that were <25m apart from 2016 to 2022 in Florida**")

# Create final coral and fish keys
coral_2016_2022_FL_key_final <- fish_coral_2016_2022_FL_key5 %>%
         dplyr::select(-ends_with("_FISH"),-STATION_UH,-VE_DIST,-DIST_GRP,-DIFF_DAY)

fish_2016_2022_FL_key_final <- fish_coral_2016_2022_FL_key5 %>%
  mutate(PRIMARY_SAMPLE_UNIT = paste(PRIMARY_SAMPLE_UNIT_FISH,STATION_UH,sep = ""),
         PRIMARY_SAMPLE_UNIT = str_remove_all(PRIMARY_SAMPLE_UNIT,"[NA]"),
         UNIQUE_ID2 = paste(REGION_FISH,YEAR_FISH,PRIMARY_SAMPLE_UNIT,STATION_NR_FISH,sep = "_")) %>%
  dplyr::select(-ends_with("_CORAL"),-VE_DIST,-DIST_GRP,-DIFF_DAY)

fish_coral_key_dist_table # table for fish and coral datasets for sites that <25m, 25-50m, >50m

**Table 4. Summary of the coral and fish sites surveyed that were <25m, 25-50m, and >50m apart from 2016 to 2022 in Florida. Percentages given are for each row in the table.**
Variable	<=25m N = 793¹	25-50m N = 102¹	>50m N = 159¹
Region
DRY TORT	264 (77%)	33 (9.6%)	46 (13%)
FLA KEYS	228 (67%)	40 (12%)	74 (22%)
SEFCRI	301 (82%)	29 (7.9%)	39 (11%)
Year
2016	208 (70%)	23 (7.7%)	67 (22%)
2018	177 (63%)	47 (17%)	57 (20%)
2020	84 (91%)	6 (6.5%)	2 (2.2%)
2021	63 (78%)	10 (12%)	8 (9.9%)
2022	261 (86%)	16 (5.3%)	25 (8.3%)
Coral Station Numbers
1	745 (74%)	101 (10%)	159 (16%)
2	48 (98%)	1 (2.0%)	0 (0%)
Fish Station Numbers
1	568 (77%)	70 (9.5%)	101 (14%)
2	225 (71%)	32 (10%)	58 (18%)
Fish Station U or H
H	10 (91%)	0 (0%)	1 (9.1%)
U	375 (66%)	70 (12%)	123 (22%)
Unknown	408	32	35
Month
5	71 (100%)	0 (0%)	0 (0%)
6	87 (91%)	4 (4.2%)	5 (5.2%)
7	79 (92%)	1 (1.2%)	6 (7.0%)
8	279 (79%)	34 (9.6%)	40 (11%)
9	174 (66%)	37 (14%)	54 (20%)
10	59 (60%)	14 (14%)	26 (26%)
11	25 (66%)	4 (11%)	9 (24%)
12	19 (41%)	8 (17%)	19 (41%)
¹ n (%)

# remove extreme values for plotting purposes
fish_coral_2016_2022_FL_plot <- fish_coral_2016_2022_FL_key3 %>% filter(DIST_GRP != ">50m") %>% mutate(STATION_NR_FISH = as.factor(STATION_NR_FISH))  # remove extreme value >60,000m apart

# Create a histogram to examine the distances between coral and fish stations overall
ggplot(fish_coral_2016_2022_FL_plot) + geom_histogram(aes(x=VE_DIST),fill = "light grey",color="black",bins= 10) + theme_minimal()+ xlab("Distance between fish and coral sites (m)") + ylab("Count") # histogram of distances without station numbers

# Create a histogram to examine the distances between coral and fish stations among yearS
ggplot(fish_coral_2016_2022_FL_plot) + geom_histogram(aes(x=VE_DIST), fill = "light grey",color="black",bins= 10) + facet_wrap(~YEAR_FISH) + theme_minimal()+ xlab("Distance between fish and coral sites (m)") + ylab("Count") # histogram of distances with station numbers

# Create a histogram to examine the distances between coral and fish stations separated by Station NR
ggplot(fish_coral_2016_2022_FL_plot) + geom_histogram(aes(x=VE_DIST, fill = REGION_CORAL), color="black",position = "dodge",bins= 10) +  facet_wrap(~YEAR_FISH) + theme_minimal()+ xlab("Distance between fish and coral sites (m)") + ylab("Count") + labs(fill = "Region") # histogram of distances with station numbers

# Create a histogram to examine the differences between coral and fish stations sampling dates overall
ggplot(fish_coral_2016_2022_FL_key5) + geom_histogram(aes(x=DIFF_DAY),fill = "light grey",color="black",bins= 10) + theme_minimal()+ xlab("Difference between fish and coral site sampling days (days)") + ylab("Count") # histogram of distances without station numbers

# Create a histogram to examine the differences between coral and fish stations sampling dates overall
ggplot(fish_coral_2016_2022_FL_key5) + geom_histogram(aes(x=DIFF_DAY),fill = "light grey",color="black",bins= 10) +  facet_wrap(~YEAR_FISH) + theme_minimal()+ xlab("Difference between fish and coral site sampling days (days)") + ylab("Count") # histogram of distances without station numbers

fish_coral_key_FL_key5_table # final table for fish and coral datasets that are <25m apart

**Table 5. Summary of the coral and fish sites surveyed that were <25m apart from 2016 to 2022 in Florida**
Variable	2016 N = 208¹	2018 N = 177¹	2020 N = 84¹	2021 N = 63¹	2022 N = 261¹
Region
DRY TORT	98 (47%)	96 (54%)	0 (0%)	63 (100%)	7 (2.7%)
FLA KEYS	43 (21%)	43 (24%)	0 (0%)	0 (0%)	142 (54%)
SEFCRI	67 (32%)	38 (21%)	84 (100%)	0 (0%)	112 (43%)
Coral Station Numbers
1	208 (100%)	129 (73%)	84 (100%)	63 (100%)	261 (100%)
2	0 (0%)	48 (27%)	0 (0%)	0 (0%)	0 (0%)
Fish Station Numbers
1	86 (41%)	74 (42%)	84 (100%)	63 (100%)	261 (100%)
2	122 (59%)	103 (58%)	0 (0%)	0 (0%)	0 (0%)
Fish Station U or H
H	8 (3.8%)	2 (1.1%)	0 (NA%)	0 (NA%)	0 (NA%)
U	200 (96%)	175 (99%)	0 (NA%)	0 (NA%)	0 (NA%)
Unknown	0	0	84	63	261
Sampling Month
5	71 (34%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)
6	40 (19%)	0 (0%)	0 (0%)	0 (0%)	47 (18%)
7	5 (2.4%)	45 (25%)	0 (0%)	0 (0%)	29 (11%)
8	17 (8.2%)	65 (37%)	5 (6.0%)	48 (76%)	144 (55%)
9	51 (25%)	31 (18%)	51 (61%)	15 (24%)	26 (10.0%)
10	12 (5.8%)	18 (10%)	15 (18%)	0 (0%)	14 (5.4%)
11	6 (2.9%)	17 (9.6%)	1 (1.2%)	0 (0%)	1 (0.4%)
12	6 (2.9%)	1 (0.6%)	12 (14%)	0 (0%)	0 (0%)
Latitude (min, max)	24.44, 27.12	24.43, 26.94	25.78, 26.92	24.56, 24.72	24.44, 26.90
Longitude (min, max)	-83.10, -80.00	-83.10, -80.01	-80.12, -80.00	-83.10, -82.78	-82.98, -79.99
Difference between sampling dates	27.61 days, (46.30)	33.31 days, (34.62)	36.56 days, (44.26)	65.22 days, (23.62)	5.64 days, (14.55)
¹ n (%); Min, Max; Mean, (SD)

DISTANCES BETWEEN CORAL AND FISH SITES: We calculated the distances between the coral and fish sites because these sites were not always sampled simultaneously. There was a possibility that sampling may have not been conducted within the same PSU if sites were sampled on different days, even if the site was designated with the same PSU number. We used the geosphere package to calculate the Vincenty Ellipsoid distance between coral and fish sites (Hijmans 2022). The Vincenty Ellipsoid distance assumes earth to be an ellipsoid and provides greater accuracy compared to other methods (e.g. Haversine) which assume the earth to be a sphere.

REMOVAL OF SITES BASED ON DISTANCE: The grid size of a PSU that is currently used by NCRMP is 50m2, thus an acceptable distance between stations to still be considered to reside within one PSU (given the size of coral transect and fish RVC) was determined to be 25m. Most of the sites (75%) were separated by 25m or less, but 10% of the sites were between 25-50m apart and 15% of the sites were >50m apart. For 2016 and 2018, the closest fish station NR to a coral station NR was matched within each PSU. There was no bias in the selection of fish STATION NR or STATION_UH when matching to the coral station NRs. The distance between fish and coral sampling locations appeared to decline in 2020-2022 compared to 2016-2018.

DIFFERENCE BETWEEN SAMPLING DATES OF FISH AND CORAL DATASETS: On average, for coral and fish sites there was a difference of 26 days between sampling dates. In 2021, the average days between sampling events was greatest and few sites were sampled simultaneously. In 2022, the average time between sampling events declined to its lowest value.

FINAL MATCHED CORAL AND FISH DATASET: After removing the sites that were >25m apart, a total of 793 sites remained. On average, fish stations NR2 were closer to the coral sites then fish stations NR1. Most matched fish sites from 2016 and 2018 had the station “U” designation while only a few stations with “H” designations matched from 2016-2018. Most of the sampling effort occurred during the summer from Jun-Sept, with some sampling less frequently occurring in May and Oct-Dec.

Calculate the dependent variables

# Create Unique ID from raw fish data and reassign sampling years
sample_data <- fish_raw_2016_2023_FL_all$sample_data %>%
  mutate(YEAR = case_when((YEAR == 2021 & REGION == "SEFCRI")~ 2020, TRUE ~ YEAR), # Correct sampling year is 2020 for these samples, not 2021 in Southeast Florida
         YEAR = case_when((YEAR == 2023 & REGION == "DRY TORT")~ 2022, TRUE ~ YEAR), # Reassign these to 2022 and explain in text the reason for this (planned sampling year was 2022 but sampling extended into 2023)
         PRIMARY_SAMPLE_UNIT = str_remove(PRIMARY_SAMPLE_UNIT,"2022_"), # remove 2022_ from Dry Tortugas 2023 dataset PSU
         PRIMARY_SAMPLE_UNIT = str_remove(PRIMARY_SAMPLE_UNIT,"2023_"), # remove 2023_ from Dry Tortugas 2023 dataset PSU
         UNIQUE_ID2 = paste(REGION,YEAR,PRIMARY_SAMPLE_UNIT,STATION_NR,sep = "_"))

# Remove unnecessary stations from the raw fish data
sample_data2 <- sample_data %>%
  semi_join(fish_2016_2022_FL_key_final, by = "UNIQUE_ID2") 

# Recombine with strata and taxonomic data
fish_2016_2022_FL_all_2 <- list()
fish_2016_2022_FL_all_2$sample_data <- sample_data2
fish_2016_2022_FL_all_2$stratum_data <- fish_raw_2016_2023_FL_all$stratum_data
fish_2016_2022_FL_all_2$taxonomic_data <- fish_raw_2016_2023_FL_all$taxonomic_data

# Select all species with > 1% occurrence
every_fish_sp <- data.frame(species = c(fish_2016_2022_FL_all_2$taxonomic_data$SPECIES_CD),group = fish_2016_2022_FL_all_2$taxonomic_data$SPECIES_CD) # lookup table with all species
fish_occur <- getDomainOccurrence(fish_2016_2022_FL_all_2, species = every_fish_sp$species, group = every_fish_sp) # calculate domain occurrence for all species
fish_occur_greater1 <- fish_occur %>% # Select only fish species that have occurrence greater than 1
  filter(occurrence > 0.01) %>% 
  dplyr::select(GROUP) %>%
  distinct()

# Remove cryptic fish species and species with <1% occurrence from taxonomic list
fish_occur_greater1$SPECIES_CD <- fish_occur_greater1$GROUP
fish_2016_2022_FL_all_2$taxonomic_data <- fish_2016_2022_FL_all_2$taxonomic_data %>%
  semi_join(fish_occur_greater1, by = "SPECIES_CD") %>%
  filter(!FAMILY %in% c("Blenniidae","Chaenopsidae","Labrisomidae","Gobiidae"))
tax_dat_rev <- fish_2016_2022_FL_all_2$taxonomic_data

# Remove cryptic fish species and species with <1% occurrence from sample data
filter_fish_sp <- data.frame(SPECIES_CD = c(fish_2016_2022_FL_all_2$taxonomic_data$SPECIES_CD))
fish_2016_2022_FL_all_2$sample_data <- filter(fish_2016_2022_FL_all_2$sample_data, SPECIES_CD %in% filter_fish_sp$SPECIES_CD)

# Add in trophic guild information by taxa from Parravincini et al. 2020 guild assignments
P_guilds <- as.data.frame(unclass(read_xlsx("Parravincini_trophic_guilds.xlsx")),stringsAsFactors = TRUE)
P_guilds$SCINAME <- P_guilds$species
miss_spp1 <- tax_dat_rev %>% # Find species with missing trophic guild assignments
  anti_join(P_guilds, by = "SCINAME") # 39 missing spp

# Add in trophic guild information by taxa from Heidmann et al. 2024 guild assignments
H_guilds <- as.data.frame(unclass(read_xlsx("Heidmann_trophic_guilds.xlsx")),stringsAsFactors = TRUE)
H_guilds$SPECIES_CD <- H_guilds$code
miss_spp2 <- miss_spp1 %>% # Find species with missing trophic guild assignments
  anti_join(H_guilds, by = "SPECIES_CD")
H_guilds2 <- miss_spp1 %>%
  inner_join(H_guilds, by = "SPECIES_CD")
write.csv(miss_spp2,"Missing_species.csv")

# Assign any missing guilds
miss_spp3 <- as.data.frame(unclass(read_xlsx("Missing_species_assigned.xlsx")),stringsAsFactors = TRUE)

# Create lookup table with guild assignments from Heidmann, Parravincini, and other
guild_lookup1 <- tax_dat_rev %>% # Find species with missing trophic guild assignments
  inner_join(P_guilds, by = "SCINAME",suffix = (c("", ""))) %>%
  dplyr::select(SPECIES_CD,guild)

guild_lookup2 <- tax_dat_rev %>% # Find species with missing trophic guild assignments
  inner_join(H_guilds2, by = "SPECIES_CD",suffix = (c("", ""))) %>%
  dplyr::select(SPECIES_CD,guild)
guild_lookup2$guild <- fct_recode(guild_lookup2$guild, "piscivore" = "piscviore") # rename piscviore to piscivore

guild_lookup3 <- tax_dat_rev %>% # Find species with missing trophic guild assignments
  inner_join(miss_spp3, by = "SPECIES_CD",suffix = (c("", ""))) %>%
  dplyr::select(SPECIES_CD,guild)

guild_lookup4 <- rbind(guild_lookup1,guild_lookup2,guild_lookup3)

# Calculate biomass, density, abundance, and richness of all species at PSU level
all_fish_sp <- data.frame(species = c(fish_2016_2022_FL_all_2$taxonomic_data$SPECIES_CD),group = rep("All Species"))
all_fish_biomass <- getPSUBiomass(fish_2016_2022_FL_all_2, species = all_fish_sp$species, group = all_fish_sp)
all_fish_dens <- getPSUDensity(fish_2016_2022_FL_all_2, species = all_fish_sp$species, group = all_fish_sp)
all_fish_richness <- fish_2016_2022_FL_all_2$sample_data %>% group_by(PRIMARY_SAMPLE_UNIT,REGION,YEAR,STATION_NR) %>% summarize(RICHNESS = length(unique(SPECIES_CD[NUM > 0])))
all_fish_diversity1 <- fish_2016_2022_FL_all_2$sample_data %>% group_by(UNIQUE_ID2,SPECIES_CD) %>% summarize(species_count = sum(NUM), .groups = "drop") %>% complete(UNIQUE_ID2, SPECIES_CD, fill = list(species_count = 0)) %>% pivot_wider(names_from = SPECIES_CD, values_from = species_count) %>% unchop(everything()) 
all_fish_diversity2 <- all_fish_diversity1 %>% 
  mutate(SHANNON = diversity(all_fish_diversity1[,-1], index="shannon")) %>%
  dplyr::select(UNIQUE_ID2,SHANNON)

# Calculate biomass, density, abundance, and richness of fish guilds at PSU level
fish_2016_2022_FL_all_guild <- fish_2016_2022_FL_all_2$sample_data %>%
  left_join(guild_lookup4, by = "SPECIES_CD")
guild_fish_biomass <- getPSUBiomass(fish_2016_2022_FL_all_2, species = guild_lookup4$SPECIES_CD, group = guild_lookup4) %>% dplyr::select(GROUP,biomass,YEAR,REGION,PRIMARY_SAMPLE_UNIT) 
guild_fish_dens <- getPSUDensity(fish_2016_2022_FL_all_2, species = guild_lookup4$SPECIES_CD, group = guild_lookup4)
guild_fish_richness <- fish_2016_2022_FL_all_guild %>% group_by(PRIMARY_SAMPLE_UNIT,REGION,YEAR,STATION_NR,guild) %>% summarize(RICHNESS = length(unique(SPECIES_CD[NUM > 0]))) %>% mutate(GROUP = guild) %>% dplyr::select(-guild)

# Join dependent variables
# Total fish dependent variables
all_fish_dep_vars1 <- all_fish_biomass %>%
  inner_join(all_fish_dens, by = c("REGION","YEAR","PRIMARY_SAMPLE_UNIT"), suffix = c("", ".y")) %>% select(-ends_with(".y")) %>%
  inner_join(all_fish_richness, by = c("REGION","YEAR","PRIMARY_SAMPLE_UNIT"), suffix = c("", ".y")) %>% select(-ends_with(".y")) %>%
  mutate(UNIQUE_ID2 = paste(REGION,YEAR,PRIMARY_SAMPLE_UNIT,STATION_NR,sep = "_")) %>%
  inner_join(all_fish_diversity2, by = "UNIQUE_ID2") %>%
  dplyr::select(-m,-GROUP,-var) %>%
  rename(.,c(BIOMASS = biomass,DENSITY = density))

# Guild dependent variables
all_fish_dep_vars2 <- guild_fish_biomass %>%
  inner_join(guild_fish_dens, by = c("REGION","YEAR","PRIMARY_SAMPLE_UNIT","GROUP"), suffix = c("", ".y")) %>% select(-ends_with(".y")) %>%
  inner_join(guild_fish_richness, by = c("REGION","YEAR","PRIMARY_SAMPLE_UNIT","GROUP"), suffix = c("", ".y")) %>% select(-ends_with(".y")) %>%
  mutate(UNIQUE_ID2 = paste(REGION,YEAR,PRIMARY_SAMPLE_UNIT,STATION_NR,sep = "_"),
         GUILD = GROUP) %>%
  dplyr::select(UNIQUE_ID2,GUILD,biomass,density,RICHNESS) %>%
  rename(.,c(BIOMASS = biomass,DENSITY = density))

# Shorten guild names
guild_short <- data.frame(cbind(as.vector(unique(all_fish_dep_vars2$GUILD)), c("CORALL","CRUST","HMD","MACRO","MICRO","PISC","PLANK","SESS","INVERT")))
colnames(guild_short) <- c("GUILD","GUILD_SHORT")

all_fish_dep_vars3 <- all_fish_dep_vars2 %>%
  inner_join(guild_short, by = "GUILD") %>%
  select(-GUILD) %>%
  pivot_wider(names_from = GUILD_SHORT, values_from = c(BIOMASS,DENSITY,RICHNESS))

# Combine guild and all fish dep vars
all_fish_dep_vars4 <-  all_fish_dep_vars1 %>%
  left_join(all_fish_dep_vars3, by = "UNIQUE_ID2")

# Combine all fish dataset variables
fish_2016_2022_FL_all3 <- fish_2016_2022_FL_all_2$sample_data %>%
  dplyr::select(UNIQUE_ID2,MONTH,DAY,LAT_DEGREES,LON_DEGREES,DEPTH,MAPGRID_NR,HABITAT_CD,ZONE_NR,SUBREGION_NR,MPA_NR,RUGOSITY_CD,RUGOSITY_CLASS) %>% distinct

fish_2016_2022_FL_all_dep <- fish_2016_2022_FL_all3 %>%
  inner_join(all_fish_dep_vars4, by = "UNIQUE_ID2")

Calculate the independent variables

# Bring in the benthic fish datasets
DRTO_2016 <- as.data.frame(unclass(read_xlsx("DRTO_2016_Habitat.xlsx")),stringsAsFactors = TRUE) %>% dplyr::select(-HARD_REL_PCT_0:-HARD_REL_PCT_4) %>% mutate(REGION = rep("DRY TORT"))
DRTO_2018 <- as.data.frame(unclass(read_xlsx("DRTO_2018_Habitat.xlsx")),stringsAsFactors = TRUE) %>% dplyr::select(-BOTTOM_TEMP) %>% mutate(REGION = rep("DRY TORT"))
DRTO_2021 <- as.data.frame(unclass(read_xlsx("DRTO_2021_Habitat.xlsx")),stringsAsFactors = TRUE) %>% dplyr::select(-BOTTOM_TEMP) %>% mutate(REGION = rep("DRY TORT"),PRIMARY_SAMPLE_UNIT = as.character(PRIMARY_SAMPLE_UNIT))
DRTO_2023 <- as.data.frame(unclass(read_xlsx("DRTO_2023_Habitat.xlsx")),stringsAsFactors = TRUE) %>% dplyr::select(-BOTTOM_TEMP) %>% mutate(REGION = rep("DRY TORT"),YEAR = rep(2022),PRIMARY_SAMPLE_UNIT = as.character(PRIMARY_SAMPLE_UNIT)) # Reassign 2023 to 2022
FKEYS_2016 <- as.data.frame(unclass(read_xlsx("FKEYS_2016_Habitat.xlsx")),stringsAsFactors = TRUE) %>% dplyr::select(-HARD_REL_PCT_0:-HARD_REL_PCT_4) %>% mutate(REGION = rep("FLA KEYS"),PRIMARY_SAMPLE_UNIT = as.character(PRIMARY_SAMPLE_UNIT))
FKEYS_2018 <- as.data.frame(unclass(read_xlsx("FKEYS_2018_Habitat.xlsx")),stringsAsFactors = TRUE) %>% dplyr::select(-BOTTOM_TEMP) %>% mutate(REGION = rep("FLA KEYS"))
FKEYS_2022 <- as.data.frame(unclass(read_xlsx("FKEYS_2022_Habitat.xlsx")),stringsAsFactors = TRUE) %>% mutate(REGION = rep("FLA KEYS"),PRIMARY_SAMPLE_UNIT = as.character(PRIMARY_SAMPLE_UNIT))
SEFCRI_2016 <- as.data.frame(unclass(read_xlsx("SEFCRI_2016_Habitat.xlsx")),stringsAsFactors = TRUE) %>% dplyr::select(-BOTTOM_TEMP) %>% mutate(REGION = rep("SEFCRI"))
SEFCRI_2018 <- as.data.frame(unclass(read_xlsx("SEFCRI_2018_Habitat.xlsx")),stringsAsFactors = TRUE) %>% dplyr::select(-BOTTOM_TEMP) %>% mutate(REGION = rep("SEFCRI"))
SEFCRI_2020 <- as.data.frame(unclass(read_xlsx("SEFCRI_2020_Habitat.xlsx")),stringsAsFactors = TRUE) %>% dplyr::select(-BOTTOM_TEMP) %>% mutate(REGION = rep("SEFCRI"),PRIMARY_SAMPLE_UNIT = as.character(PRIMARY_SAMPLE_UNIT))
SEFCRI_2021 <- as.data.frame(unclass(read_xlsx("SEFCRI_2021_Habitat.xlsx")),stringsAsFactors = TRUE) %>% dplyr::select(-BOTTOM_TEMP) %>% mutate(REGION = rep("SEFCRI"),YEAR = rep(2020),PRIMARY_SAMPLE_UNIT = as.character(PRIMARY_SAMPLE_UNIT)) # Reassign 2021 to 2020
SEFCRI_2022 <- as.data.frame(unclass(read_xlsx("SEFCRI_2022_Habitat.xlsx")),stringsAsFactors = TRUE)%>% mutate(REGION = rep("SEFCRI"),PRIMARY_SAMPLE_UNIT = as.character(PRIMARY_SAMPLE_UNIT))

# Reassign incorrectly labeled Primary Sample Units as U to H
SEFCRI_2016_2 <- SEFCRI_2016 %>% 
  mutate(PRIMARY_SAMPLE_UNIT = case_when((PRIMARY_SAMPLE_UNIT == "3254U" & STATION_NR == 2)~ "3254H",(PRIMARY_SAMPLE_UNIT == "3108U" & STATION_NR == 2)~ "3108H",(PRIMARY_SAMPLE_UNIT == "3119U" & STATION_NR == 2)~ "3119H",TRUE ~ PRIMARY_SAMPLE_UNIT))  

# Create unique id2
fish_hab <- rbind(DRTO_2016,DRTO_2018,DRTO_2021,DRTO_2023,FKEYS_2016,FKEYS_2018,FKEYS_2022,SEFCRI_2016_2,SEFCRI_2018,SEFCRI_2020,SEFCRI_2021,SEFCRI_2022) %>%
    mutate(UNIQUE_ID2 = paste(REGION,YEAR,PRIMARY_SAMPLE_UNIT,STATION_NR,sep = "_")) %>%
  dplyr::select(-YEAR,-PRIMARY_SAMPLE_UNIT,-STATION_NR,-REGION)

# Combine with dependent fish vars
fish_vars_all <- fish_2016_2022_FL_all_dep %>%
  inner_join(fish_hab, by = "UNIQUE_ID2",suffix = c("_SAMPLE","_BENTHIC"))

# Bring in the benthic and demographic coral datasets
coral_benthic_2016_2022_FL <- coral_raw_2016_2022_FL # benthic dataset

coral_demo_raw_ALL <- NCRMP_demo_All_regions_years # demographic dataset
coral_demo_2016_2022_FL <- coral_demo_raw_ALL %>%
  filter(REGION %in% c("SEFCRI", "FLK", "Tortugas"), YEAR %in% 2016:2022)

# Revise dataset region labels and create unique IDs
coral_benthic_2016_2022_FL_1 <- coral_benthic_2016_2022_FL %>%
    mutate(REGION = case_when(REGION == "FLK" ~ "FLA KEYS",
                            REGION == "Tortugas" ~ "DRY TORT",
                            REGION == "SEFCRI" ~ "SEFCRI"),
           UNIQUE_ID = paste(REGION,YEAR,PRIMARY_SAMPLE_UNIT,sep = "_"))

coral_demo_2016_2022_FL_1 <- coral_demo_2016_2022_FL %>%
    mutate(REGION = case_when(REGION == "FLK" ~ "FLA KEYS",
                            REGION == "Tortugas" ~ "DRY TORT",
                            REGION == "SEFCRI" ~ "SEFCRI"),
           UNIQUE_ID = paste(REGION,YEAR,PRIMARY_SAMPLE_UNIT,sep = "_"))

# Remove unnecessary stations from the benthic and demo coral datasets
coral_benthic_2016_2022_FL_2 <- coral_benthic_2016_2022_FL_1 %>%
  semi_join(coral_2016_2022_FL_key_final, by = "UNIQUE_ID") 

coral_demo_2016_2022_FL_2 <- coral_demo_2016_2022_FL_1 %>%
  semi_join(coral_2016_2022_FL_key_final, by = "UNIQUE_ID") 

# Rugosity values check
wtd_rug_check <- coral_benthic_2016_2022_FL_2 %>%
  dplyr::select(UNIQUE_ID,WTD_RUG,MEAN_RUG) %>% distinct() 
missing_wtd_rug <- wtd_rug_check[!complete.cases(wtd_rug_check$WTD_RUG),] # 423 missing observations

wtd_rug_check2 <- coral_demo_2016_2022_FL_2 %>%
  dplyr::select(UNIQUE_ID,WTD_RUG,MEAN_RUG) %>% distinct() 
missing_wtd_rug2 <- wtd_rug_check[!complete.cases(wtd_rug_check$WTD_RUG),] # 423 missing observations

# Combine mean and weighted rugosity columns
rug_coral <- coral_demo_2016_2022_FL_2 %>%
  dplyr::select(UNIQUE_ID,WTD_RUG,MEAN_RUG) %>% 
  mutate(RUG_CORAL = coalesce(WTD_RUG,MEAN_RUG)) %>%
  group_by(UNIQUE_ID) %>%
  summarize(RUG_CORAL = mean(RUG_CORAL)) %>%
  dplyr::select(UNIQUE_ID,RUG_CORAL)

# Calculate percent cover by groups at each site for each region
SEFCRI_per_cvr <- NCRMP_calculate_cover(region = "SEFCRI",project = "NCRMP")
DRY_TORT_per_cvr <- NCRMP_calculate_cover(region = "Tortugas", project = "NCRMP")
FLA_KEYS_per_cvr <- NCRMP_calculate_cover(region = "FLK", project = "NCRMP")

# Combine and reformat percent cover dataset
all_per_cvr <- rbind(SEFCRI_per_cvr$percent_cover_site,DRY_TORT_per_cvr$percent_cover_site,FLA_KEYS_per_cvr$percent_cover_site)

all_per_cvr2 <- all_per_cvr %>%
  dplyr::select(c(YEAR,cover_group,Percent_Cvr,REGION,PRIMARY_SAMPLE_UNIT)) %>%
  pivot_wider(names_from = "cover_group",values_from = "Percent_Cvr") %>% 
  unchop(everything()) 

# Dry Tortugas values from 2021 were assigned to 2020, reassign year correctly
lookup_cover <- c(HARD_CORALS = 'HARD CORALS', SOFT_CORALS = 'SOFT CORALS', TURF_ALGAE = 'TURF ALGAE', RAMICRUSTA_SPP = 'RAMICRUSTA SPP.')
all_per_cvr3 <- all_per_cvr2 %>%
  mutate(REGION = case_when(REGION == "FLK" ~ "FLA KEYS",
                            REGION == "Tortugas" ~ "DRY TORT",
                            REGION == "SEFCRI" ~ "SEFCRI"),
         PRIMARY_SAMPLE_UNIT = str_remove_all(PRIMARY_SAMPLE_UNIT,"[TUVW]"),
         YEAR = case_when((YEAR == 2020 & REGION == "DRY TORT")~ 2021, TRUE ~ YEAR),
        UNIQUE_ID = (paste(REGION,YEAR,PRIMARY_SAMPLE_UNIT,sep = "_"))) %>% # Correct sampling year is 2021 for these samples, not 2020 in Dry 
        rename(.,all_of(lookup_cover)) %>%
  semi_join(coral_2016_2022_FL_key_final, by = "UNIQUE_ID")

# Calculate coral density at each site for each region
SEFCRI_density <- NCRMP_DRM_calculate_colony_density(region = "SEFCRI", project = "NCRMP_DRM")
DRY_TORT_density <- NCRMP_DRM_calculate_colony_density(region = "Tortugas", project = "NCRMP_DRM")
FLA_KEYS_density <- NCRMP_DRM_calculate_colony_density(region = "FLK", project = "NCRMP_DRM")

# Combine and reformat density dataset
all_density <- rbind(SEFCRI_density$density_site,DRY_TORT_density$density_site,FLA_KEYS_density$density_site)
all_density2 <- all_density %>%
  mutate(REGION = case_when(REGION == "FLK" ~ "FLA KEYS",
                            REGION == "Tortugas" ~ "DRY TORT",
                            REGION == "SEFCRI" ~ "SEFCRI"),
    PRIMARY_SAMPLE_UNIT = str_remove_all(PRIMARY_SAMPLE_UNIT,"[TUVW]"),
    UNIQUE_ID = (paste(REGION,YEAR,PRIMARY_SAMPLE_UNIT,sep = "_"))) %>%
    group_by(UNIQUE_ID) %>%
    summarize(CORAL_DENSITY = mean(DENSITY)) %>%
  dplyr::select(c(UNIQUE_ID,CORAL_DENSITY)) %>%
  semi_join(coral_2016_2022_FL_key_final, by = "UNIQUE_ID")

# Calculate coral richness and diversity at each site for each region
SEFCRI_richness_diversity <- NCRMP_DRM_calculate_species_richness_diversity(region = "SEFCRI", project = "NCRMP_DRM")
DRY_TORT_richness_diversity <- NCRMP_DRM_calculate_species_richness_diversity(region = "Tortugas", project = "NCRMP_DRM")
FLA_KEYS_richness_diversity <- NCRMP_DRM_calculate_species_richness_diversity(region = "FLK", project = "NCRMP_DRM")

# Combine and reformat richness dataset
all_richness <- rbind(SEFCRI_richness_diversity$richness_site,DRY_TORT_richness_diversity$richness_site,FLA_KEYS_richness_diversity$richness_site)
all_richness2 <- all_richness %>%
  mutate(REGION = case_when(REGION == "FLK" ~ "FLA KEYS",
                            REGION == "Tortugas" ~ "DRY TORT",
                            REGION == "SEFCRI" ~ "SEFCRI"),
             PRIMARY_SAMPLE_UNIT = str_remove_all(PRIMARY_SAMPLE_UNIT,"[TUVW]"),
    UNIQUE_ID = (paste(REGION,YEAR,PRIMARY_SAMPLE_UNIT,sep = "_"))) %>%
    group_by(UNIQUE_ID) %>%
    summarize(CORAL_RICHNESS = mean(SPP_RICHNESS)) %>%
  dplyr::select(c(UNIQUE_ID,CORAL_RICHNESS)) %>%
  semi_join(coral_2016_2022_FL_key_final, by = "UNIQUE_ID")


# Combine and reformat diversity dataset
all_diversity <- rbind(SEFCRI_richness_diversity$species_diversity_site,DRY_TORT_richness_diversity$species_diversity_site,FLA_KEYS_richness_diversity$species_diversity_site)
all_diversity2 <- all_diversity %>%
  mutate(REGION = case_when(REGION == "FLK" ~ "FLA KEYS",
                            REGION == "Tortugas" ~ "DRY TORT",
                            REGION == "SEFCRI" ~ "SEFCRI"),
             PRIMARY_SAMPLE_UNIT = str_remove_all(PRIMARY_SAMPLE_UNIT,"[TUVW]"),
    UNIQUE_ID = (paste(REGION,YEAR,PRIMARY_SAMPLE_UNIT,sep = "_"))) %>%
    group_by(UNIQUE_ID) %>%
    summarize(CORAL_DIV_SHANNON = mean(Shannon)) %>%
  dplyr::select(c(UNIQUE_ID,CORAL_DIV_SHANNON)) %>%
  semi_join(coral_2016_2022_FL_key_final, by = "UNIQUE_ID")


# Calculate hardbottom, softbottom, and rubble
coral_benthic_2016_2022_FL_bottom_type <- coral_benthic_2016_2022_FL_2 %>%
  group_by(UNIQUE_ID,STRAT) %>%
  summarize(HARDBOTTOM = sum(HARDBOTTOM_P,na.rm = T),
            SOFTBOTTOM = sum(SOFTBOTTOM_P,na.rm = T),
            RUBBLE = sum(RUBBLE_P,na.rm = T)) %>%
  semi_join(coral_2016_2022_FL_key_final, by = "UNIQUE_ID")


coral_demo_2016_2022_FL_diam <- coral_demo_2016_2022_FL_2 %>%
  group_by(UNIQUE_ID) %>%
  summarize(MAX_DIAM = max(MAX_DIAMETER,na.rm = T),
            MEAN_DIAM = mean(MAX_DIAMETER,na.rm = T),
            SUM_DIAM = sum(MAX_DIAMETER,na.rm = T),
            MAX_PERP_DIAM = max(PERP_DIAMETER,na.rm = T),
            MEAN_PERP_DIAM = mean(PERP_DIAMETER,na.rm = T),
            SUM_PERP_DIAM = sum(PERP_DIAMETER,na.rm = T),
            MAX_HT = max(HEIGHT,na.rm = T),
            MEAN_HT = mean(HEIGHT,na.rm = T),
            SUM_HT = sum(HEIGHT,na.rm = T)) %>%
  semi_join(coral_2016_2022_FL_key_final, by = "UNIQUE_ID")

# Identify rows with all NA values
coral_demo_na <- coral_demo_2016_2022_FL_diam %>%
  filter(is.na(MEAN_PERP_DIAM))

# Filter dataset to remove rows with all NA values
coral_demo_2016_2022_FL_diam2 <- coral_demo_2016_2022_FL_diam %>%
  anti_join(coral_demo_na, by = "UNIQUE_ID")

# Get min, max, and meters completed
coral_depths <- coral_benthic_2016_2022_FL_2 %>%
  dplyr::select(MIN_DEPTH,MAX_DEPTH,METERS_COMPLETED,UNIQUE_ID) %>%
  rename(.,c(MIN_DEPTH_CORAL = MIN_DEPTH, MAX_DEPTH_CORAL = MAX_DEPTH, METERS_CORAL = METERS_COMPLETED)) %>%
  semi_join(coral_2016_2022_FL_key_final, by = "UNIQUE_ID") %>% distinct()

# Combine all coral independent variables
coral_vars <- coral_benthic_2016_2022_FL_bottom_type %>% # 793
  left_join(all_per_cvr3, by = "UNIQUE_ID") %>% # 778
  left_join(coral_demo_2016_2022_FL_diam2, by = "UNIQUE_ID") %>% # 777
  left_join(all_density2, by = "UNIQUE_ID") %>% # 777
  left_join(all_richness2, by = "UNIQUE_ID") %>% #759
  left_join(all_diversity2, by = "UNIQUE_ID") %>% # 750
  left_join(rug_coral, by = "UNIQUE_ID") %>%
  left_join(coral_depths, by = "UNIQUE_ID")

# Combine with original coral dataset
coral_vars_all <- coral_2016_2022_FL_key_final %>%
  left_join(coral_vars, by = "UNIQUE_ID") %>%
  dplyr::select(-PRIMARY_SAMPLE_UNIT,-YEAR,-REGION) %>%
  rename(.,STRAT_CORAL = STRAT)

Combine fish and coral datasets

# Combine fish and coral dataset variables
fish_vars_all2 <- fish_vars_all %>%
  mutate(PRIMARY_SAMPLE_UNIT_FISH = str_remove_all(PRIMARY_SAMPLE_UNIT,"[UH]"),
         UNIQUE_ID = paste(REGION,YEAR,PRIMARY_SAMPLE_UNIT_FISH,sep = "_")) %>%
  rename(.,c(MONTH_FISH = MONTH, DAY_FISH = DAY, LAT_DEGREES_FISH = LAT_DEGREES, LON_DEGREES_FISH = LON_DEGREES,PRIMARY_SAMPLE_UNIT_UH = PRIMARY_SAMPLE_UNIT, STATION_NR_FISH = STATION_NR, STRAT_FISH = STRAT, FISH_BIOMASS = BIOMASS, FISH_DENSITY = DENSITY, FISH_SHANNON = SHANNON, FISH_RICHNESS = RICHNESS))

fish_coral_dataset_final <- coral_vars_all %>%
  inner_join(fish_vars_all2, by = "UNIQUE_ID", suffix = c("","_FISH")) %>% as.data.frame() 

# Combine HABITAT_CD classes to reflect habitat type only, not depth or relief
NEW_HAB_TYPE1 <- as.data.frame(unclass(read_xlsx("Habitat_type.xlsx")),stringsAsFactors = TRUE)
fish_coral_dataset_final <- fish_coral_dataset_final %>%
  inner_join(NEW_HAB_TYPE1, by = "HABITAT_CD") %>%
  mutate(HABITAT = NEW_HABITAT_TYPE)

# Export dataset
fish_coral_dataset_final_csv = data.frame(lapply(fish_coral_dataset_final, as.character), stringsAsFactors=FALSE)
write.csv(fish_coral_dataset_final_csv,"Final_fish_coral_dataset.csv")

PERMANOVA

# Remove missing values
dat1 <- fish_coral_dataset_final
dat2 <- dat1[complete.cases(dat1$MAX_DIAM),] # find missing values

# PERMANOVA among regions
biomass.dist<-vegdist(dat2$FISH_BIOMASS, method='bray')
biomass.perm<-adonis2(biomass.dist~ dat2$REGION + dat2$YEAR + dat2$LON_DEGREES_FISH*dat2$LAT_DEGREES_FISH, data=dat2, permutations=9999)
biomass.perm

# adonis2(formula = biomass.dist ~ dat3$REGION + dat3$YEAR + dat3$LON_DEGREES_FISH * dat3$LAT_DEGREES_FISH, data = dat3, 
# permutations = 9999)
#                                              Df SumOfSqs      R2       F Pr(>F)    
# dat3$REGION                                   2    4.708 0.04480 16.6406 0.0001 ***
# dat3$YEAR                                     1    0.393 0.00374  2.7795 0.0514 .  
# dat3$LON_DEGREES_FISH                         1    0.746 0.00710  5.2724 0.0033 ** 
# dat3$LAT_DEGREES_FISH                         1    0.389 0.00370  2.7499 0.0527 .  
# dat3$LON_DEGREES_FISH:dat3$LAT_DEGREES_FISH   1    0.533 0.00508  3.7700 0.0173 *  
# Residual                                    695   98.314 0.93558                   
# Total                                       701  105.084 1.00000 


biomass.dist<-vegdist(dat2$FISH_BIOMASS, method='bray')
biomass.perm2<-adonis2(biomass.dist~ dat2$REGION + dat2$YEAR + dat2$LON_DEGREES_FISH*dat2$LAT_DEGREES_FISH + dat2$HABITAT + dat2$PROT + dat2$SUBREGION_NR, data=dat2, permutations=9999)
biomass.perm2

# adonis2(formula = biomass.dist ~ dat3$REGION + dat3$YEAR + dat3$LON_DEGREES_FISH * dat3$LAT_DEGREES_FISH + dat3$HABITAT + dat3$PROT + dat3$SUBREGION_NR, data = dat3, permutations = 999)
#                                              Df SumOfSqs      R2       F Pr(>F)    
# dat3$REGION                                   2    4.708 0.04480 17.3091  0.001 ***
# dat3$YEAR                                     1    0.393 0.00374  2.8912  0.038 *  
# dat3$LON_DEGREES_FISH                         1    0.746 0.00710  5.4842  0.002 ** 
# dat3$LAT_DEGREES_FISH                         1    0.389 0.00370  2.8603  0.053 .  
# dat3$HABITAT                                  8    3.970 0.03778  3.6493  0.001 ***
# dat3$PROT                                     1    0.274 0.00260  2.0126  0.107    
# dat3$SUBREGION_NR                             1    1.152 0.01096  8.4696  0.001 ***
# dat3$LON_DEGREES_FISH:dat3$LAT_DEGREES_FISH   1    0.295 0.00280  2.1658  0.107    
# Residual                                    685   93.157 0.88651                   
# Total                                       701  105.084 1.00000   


density.dist<-vegdist(dat2$FISH_DENSITY, method='bray')
density.perm<-adonis2(density.dist~ dat2$REGION + dat2$YEAR + dat2$LON_DEGREES_FISH*dat2$LAT_DEGREES_FISH + dat2$HABITAT + dat2$PROT + dat2$SUBREGION_NR, data=dat2, permutations=9999)
density.perm

# adonis2(formula = density.dist ~ dat3$REGION + dat3$YEAR + dat3$LON_DEGREES_FISH * dat3$LAT_DEGREES_FISH + dat3$HABITAT + dat3$PROT + dat3$SUBREGION_NR, data = dat3, permutations = 9999)
#                                              Df SumOfSqs      R2       F Pr(>F)    
# dat3$REGION                                   2    0.574 0.00995  3.8668 0.0053 ** 
# dat3$YEAR                                     1    0.094 0.00163  1.2663 0.2643    
# dat3$LON_DEGREES_FISH                         1    0.265 0.00459  3.5688 0.0328 *  
# dat3$LAT_DEGREES_FISH                         1    0.809 0.01402 10.8952 0.0002 ***
# dat3$HABITAT                                  8    4.193 0.07266  7.0586 0.0001 ***
# dat3$PROT                                     1    0.082 0.00142  1.1026 0.3091    
# dat3$SUBREGION_NR                             1    0.612 0.01060  8.2403 0.0009 ***
# dat3$LON_DEGREES_FISH:dat3$LAT_DEGREES_FISH   1    0.217 0.00376  2.9227 0.0514 .  
# Residual                                    685   50.859 0.88137                   
# Total                                       701   57.704 1.00000 

richness.dist<-vegdist(dat2$FISH_RICHNESS, method='bray')
richness.perm<-adonis2(richness.dist~ dat2$REGION + dat2$YEAR + dat2$LON_DEGREES_FISH*dat2$LAT_DEGREES_FISH + dat2$HABITAT + dat2$PROT + dat2$SUBREGION_NR, data=dat2, permutations=9999)
richness.perm

# adonis2(formula = richness.dist ~ dat3$REGION + dat3$YEAR + dat3$LON_DEGREES_FISH * dat3$LAT_DEGREES_FISH + dat3$HABITAT + dat3$PROT + dat3$SUBREGION_NR, data = dat3, permutations = 9999)
#                                              Df SumOfSqs      R2       F Pr(>F)    
# dat3$REGION                                   2   0.5586 0.03370 13.9902 0.0001 ***
# dat3$YEAR                                     1   0.0227 0.00137  1.1381 0.2877    
# dat3$LON_DEGREES_FISH                         1   0.0318 0.00192  1.5905 0.1959    
# dat3$LAT_DEGREES_FISH                         1   0.1258 0.00759  6.3022 0.0063 ** 
# dat3$HABITAT                                  8   1.8341 0.11063 11.4837 0.0001 ***
# dat3$PROT                                     1   0.1876 0.01131  9.3952 0.0012 ** 
# dat3$SUBREGION_NR                             1   0.0644 0.00388  3.2257 0.0569 .  
# dat3$LON_DEGREES_FISH:dat3$LAT_DEGREES_FISH   1   0.0775 0.00468  3.8837 0.0342 *  
# Residual                                    685  13.6758 0.82492                   
# Total                                       701  16.5784 1.00000 


shannon.dist<-vegdist(dat2$FISH_SHANNON, method='bray')
shannon.perm<-adonis2(shannon.dist~ dat2$REGION + dat2$YEAR + dat2$LON_DEGREES_FISH*dat2$LAT_DEGREES_FISH + dat2$HABITAT + dat2$PROT + dat2$SUBREGION_NR, data=dat2, permutations=9999)
shannon.perm

# adonis2(formula = shannon.dist ~ dat3$REGION + dat3$YEAR + dat3$LON_DEGREES_FISH * dat3$LAT_DEGREES_FISH + dat3$HABITAT + dat3$PROT + dat3$SUBREGION_NR, data = dat3, permutations = 9999)
#                                              Df SumOfSqs      R2      F Pr(>F)    
# dat3$REGION                                   2   0.0898 0.01228 4.5380 0.0059 ** 
# dat3$YEAR                                     1   0.0030 0.00041 0.3027 0.6795    
# dat3$LON_DEGREES_FISH                         1   0.0235 0.00322 2.3765 0.1136    
# dat3$LAT_DEGREES_FISH                         1   0.0066 0.00091 0.6695 0.4523    
# dat3$HABITAT                                  8   0.3549 0.04852 4.4811 0.0003 ***
# dat3$PROT                                     1   0.0214 0.00293 2.1636 0.1247    
# dat3$SUBREGION_NR                             1   0.0214 0.00292 2.1597 0.1275    
# dat3$LON_DEGREES_FISH:dat3$LAT_DEGREES_FISH   1   0.0119 0.00162 1.1973 0.2778    
# Residual                                    685   6.7811 0.92719                  
# Total                                       701   7.3137 1.00000

FSS GAM

cont.preds=c("HARDBOTTOM","SOFTBOTTOM","RUBBLE","CCA","HARD_CORALS","MACROALGAE","SOFT_CORALS","SPONGES","TURF_ALGAE","RAMICRUSTA_SPP","PCT_SAND","PCT_HARD_BOTTOM","PCT_RUBBLE","PCT_CORAL","PCT_OCTO","PCT_SPONGE") # use as continuous predictors

# Plot of likely transformations
pdf(file="predictor_transformations.pdf",onefile=T)
par(mfrow=c(2,5))
for (i in cont.preds) {
  x<-dat2[ ,i]
  x = as.numeric(unlist(x))
  hist((x))#Looks best
  plot((x),main = paste(i))
  hist(sqrt(x))
  plot(sqrt(x))
  hist(log(x+1))
  plot(log(x+1))
  hist((x)^2)
  plot((x)^2)
  hist((x)^3)
  plot((x)^3)
}
dev.off()

cont.preds=c("HARDBOTTOM","SOFTBOTTOM","RUBBLE","CCA","HARD_CORALS","MACROALGAE","SOFT_CORALS","SPONGES","TURF_ALGAE","PCT_SAND","PCT_HARD_BOTTOM","PCT_RUBBLE","PCT_CORAL","PCT_OCTO","PCT_SPONGE") # use as continuous predictors

cor_vis <- cor(dat2[,c(cont.preds)])
corrplot(cor_vis)

# Tranform predictor variables
dat3 <- dat2 %>%
  mutate(HARDBOTTOM.X3 = (HARDBOTTOM)^3,
         SOFTBOTTOM.SQRT = sqrt(SOFTBOTTOM),
         RUBBLE.SQRT = sqrt(RUBBLE),
         CCA.LOG = log(CCA + 1),
         HARD_CORALS.LOG = log(HARD_CORALS + 1),
         MACROALGAE.SQRT = sqrt(MACROALGAE),
         SOFT_CORALS.LOG = log(SOFT_CORALS + 1),
         SPONGES.LOG = log(SPONGES + 1),
         TURF_ALGAE.SQRT = sqrt(TURF_ALGAE),
         RUG_CORAL.SQRT = sqrt(RUG_CORAL),
         MIN_DEPTH_CORAL.SQRT = sqrt(MIN_DEPTH_CORAL),
         MAX_DEPTH_CORAL.SQRT = sqrt(MAX_DEPTH_CORAL),
         DEPTH_SAMPLE.SQRT = sqrt(DEPTH_SAMPLE),
         DEPTH_BENTHIC.SQRT = sqrt(DEPTH_BENTHIC),
         MAX_HARD_RELIEF.LOG = log(MAX_HARD_RELIEF + 1),
         MAX_SOFT_RELIEF.LOG = log(MAX_SOFT_RELIEF + 1),
         AVG_HARD_RELIEF.LOG = log(MAX_SOFT_RELIEF + 1),
         PCT_SAND.SQRT = sqrt(PCT_SAND),
         PCT_HARD_BOTTOM.X3 = (PCT_HARD_BOTTOM)^3,
         PCT_RUBBLE.SQRT = sqrt(RUBBLE),
         PCT_CORAL.LOG = log(PCT_CORAL + 1),
         PCT_OCTO.SQRT = sqrt(PCT_OCTO),
         PCT_SPONGE.LOG = log(PCT_SPONGE + 1),
         MAX_DIAM.LOG = log(MAX_DIAM + 1),
         MEAN_DIAM.LOG = log(MEAN_DIAM + 1),
         SUM_DIAM.LOG = log(SUM_DIAM + 1),
         MAX_PERP_DIAM.LOG = log(MAX_PERP_DIAM + 1),
         MEAN_PERP_DIAM.LOG = log(MEAN_PERP_DIAM + 1),
         SUM_PERP_DIAM.LOG = log(SUM_PERP_DIAM + 1),
         MAX_HT.LOG = log(MAX_HT + 1),
         MEAN_HT.LOG = log(MEAN_HT + 1),
         SUM_HT.LOG = log(SUM_HT + 1),
         CORAL_DENSITY.SQRT = sqrt(CORAL_DENSITY),
         CORAL_RICHNESS.SQRT = sqrt(CORAL_RICHNESS))

cont.preds=c("HARDBOTTOM.X3","SOFTBOTTOM.SQRT","RUBBLE.SQRT","CCA.LOG","HARD_CORALS.LOG","MACROALGAE.SQRT","SOFT_CORALS.LOG","SPONGES.LOG","TURF_ALGAE.SQRT","PCT_SAND.SQRT","PCT_HARD_BOTTOM.X3","PCT_RUBBLE.SQRT","PCT_CORAL.LOG","PCT_OCTO.SQRT","PCT_SPONGE.LOG") # use as continuous predictors

# FSS GAM with bottom AND coral variables, separate analyses for each region and for all regions combined
null.vars=c("YEAR","LAT_DEGREES_FISH","LON_DEGREES_FISH") # use null model
cont.preds=c("PCT_HARD_BOTTOM.X3","PCT_RUBBLE.SQRT","PCT_CORAL.LOG","PCT_OCTO.SQRT","PCT_SPONGE.LOG","MAX_HARD_RELIEF.LOG","AVG_HARD_RELIEF.LOG","MAX_SOFT_RELIEF.LOG","SUM_DIAM.LOG","MEAN_DIAM.LOG","MAX_HT.LOG","RUG_CORAL.SQRT", "CORAL_DENSITY.SQRT","CORAL_RICHNESS.SQRT","CORAL_DIV_SHANNON","DEPTH_SAMPLE.SQRT") # remove highly correlated vars before running this test

############### ALL REGIONS COMBINED #######################################
# Model fish richness using poisson family for all regions combined
cat.preds=c("HABITAT","SUBREGION_NR","PROT","REGION") # Don't use ZONE_NR b/c too many missing values
resp.vars = c("FISH_RICHNESS","RICHNESS_CORALL","RICHNESS_CRUST","RICHNESS_HMD","RICHNESS_MACRO","RICHNESS_MICRO","RICHNESS_PISC","RICHNESS_PLANK","RICHNESS_SESS","RICHNESS_INVERT")
i=1
out.all=list()
var.imp=list()
fss.all=list()
top.all=list()
pdf(file="mod_fits_cor_&_bot_vars_richness.pdf",onefile=T)
for(i in 1:length(resp.vars)){
 use.dat=na.omit(dat3[,c(null.vars,cont.preds,cat.preds,resp.vars[i])])
 use.dat$response=use.dat[,resp.vars[i]]
 Model1=gam(response~s(SUM_DIAM.LOG,k=5,bs='cr') + te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='re',k=5), family=poisson(), data=use.dat)
 
 model.set=generate.model.set(use.dat=use.dat,max.predictors=2,   # limit size here because null model already complex
                             test.fit=Model1,k=5,
                             pred.vars.cont=cont.preds,
                             pred.vars.fact=cat.preds,
                             null.terms="te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='re',k=5)")

 out.list=fit.model.set(model.set, parallel = TRUE)
 fss.all=c(fss.all,list(out.list))
 mod.table=out.list$mod.data.out
 mod.table=mod.table[order(mod.table$AICc),]
 out.i=mod.table
 out.all=c(out.all,list(out.i))
 var.imp=c(var.imp,list(out.list$variable.importance$aic$variable.weights.raw))
 all.less.2AICc=mod.table[which(mod.table$delta.AICc<2),]
 top.all=c(top.all,list(all.less.2AICc))

 # plot the all best models
 par(oma=c(1,1,4,1))
 for(r in 1:nrow(all.less.2AICc)){
 best.model.name=as.character(all.less.2AICc$modname[r])
 best.model=out.list$success.models[[best.model.name]]
 if(best.model.name!="null"){
  plot(best.model,all.terms=T,pages=1,residuals=F,pch=16)
  mtext(side=3,text=resp.vars[i],outer=T)}
 }
}
dev.off()

names(out.all)=resp.vars
names(var.imp)=resp.vars
names(top.all)=resp.vars
names(fss.all)=resp.vars

all.mod.fits=do.call("rbind",out.all)
all.var.imp.richness=do.call("rbind",var.imp)
top.mod.fits=do.call("rbind",top.all)

write.csv(all.mod.fits[,-2],"all_mod_cor_&_bot_vars_richness.csv")
write.csv(top.mod.fits[,-2],"top_mod_cor_&_bot_vars_richness.csv")
write.csv(model.set$predictor.correlations,"preds_cors_coral_vars_richness.csv")

# Fit the model for biomass, density, and shannon diversity using tweedie family for all regions combined
resp.vars = c("FISH_BIOMASS","FISH_DENSITY","FISH_SHANNON","BIOMASS_CORALL","BIOMASS_CRUST","BIOMASS_HMD","BIOMASS_MACRO","BIOMASS_MICRO","BIOMASS_PISC","BIOMASS_PLANK","BIOMASS_SESS","BIOMASS_INVERT","DENSITY_CORALL","DENSITY_CRUST","DENSITY_HMD","DENSITY_MACRO","DENSITY_MICRO","DENSITY_PISC","DENSITY_PLANK","DENSITY_SESS","DENSITY_INVERT")
pdf(file="mod_fits_coral_vars_others.pdf",onefile=T)
i=1
out.all=list()
var.imp=list()
fss.all=list()
top.all=list()
pdf(file="mod_fits_cor_&_bot_vars_others.pdf",onefile=T)
for(i in 1:length(resp.vars)){
 use.dat=na.omit(dat3[,c(null.vars,cont.preds,cat.preds,resp.vars[i])])
 use.dat$response=use.dat[,resp.vars[i]]
 Model1=gam(response~s(SUM_DIAM.LOG,k=5,bs='cr') + te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='re',k=5), family=tw(), data=use.dat)
 
 model.set=generate.model.set(use.dat=use.dat,max.predictors=2,   # limit size here because null model already complex
                             test.fit=Model1,k=5,
                             pred.vars.cont=cont.preds,
                             pred.vars.fact=cat.preds,
                             null.terms="te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='re',k=5)")

 out.list=fit.model.set(model.set, parallel = TRUE)
 fss.all=c(fss.all,list(out.list))
 mod.table=out.list$mod.data.out
 mod.table=mod.table[order(mod.table$AICc),]
 out.i=mod.table
 out.all=c(out.all,list(out.i))
 var.imp=c(var.imp,list(out.list$variable.importance$aic$variable.weights.raw))
 all.less.2AICc=mod.table[which(mod.table$delta.AICc<2),]
 top.all=c(top.all,list(all.less.2AICc))

 # plot the all best models
 par(oma=c(1,1,4,1))
 for(r in 1:nrow(all.less.2AICc)){
 best.model.name=as.character(all.less.2AICc$modname[r])
 best.model=out.list$success.models[[best.model.name]]
 if(best.model.name!="null"){
  plot(best.model,all.terms=T,pages=1,residuals=F,pch=16)
  mtext(side=3,text=resp.vars[i],outer=T)}
 }
}
dev.off()

names(out.all)=resp.vars
names(var.imp)=resp.vars
names(top.all)=resp.vars
names(fss.all)=resp.vars

all.mod.fits=do.call("rbind",out.all)
all.var.imp.others=do.call("rbind",var.imp)
top.mod.fits=do.call("rbind",top.all)

write.csv(all.mod.fits[,-2],"all_mod_cor_&_bot_vars_others.csv")
write.csv(top.mod.fits[,-2],"top_mod_cor_&_bot_vars_others.csv")
write.csv(model.set$predictor.correlations,"preds_cors_coral_vars_others.csv")

all.var.imp <- rbind(all.var.imp.others,all.var.imp.richness)
pdf(file="var_imp_hmap_cor_&_bot_vars.pdf",height=15,width=21,pointsize=8)
heatmap.2(all.var.imp,notecex=0.4,  dendrogram ="none",
                     col=colorRampPalette(c("white","yellow","orange","red"))(30),
                     trace="none",key.title = "",keysize=1,
                     notecol="black",key=T,
                     sepcolor = "black",margins=c(20,15), lhei=c(3,10),lwid=c(3,10),
                     Rowv=FALSE,Colv=FALSE)
dev.off()



############### DRY TORTUGAS ONLY #######################################
# Model fish richness using poisson family for all regions combined
cat.preds=c("HABITAT","PROT") # Don't use REGION 
dat3_DT <- dat3 %>% filter(REGION == "DRY TORT")
resp.vars = c("FISH_RICHNESS","RICHNESS_CORALL","RICHNESS_CRUST","RICHNESS_HMD","RICHNESS_MACRO","RICHNESS_MICRO","RICHNESS_PISC","RICHNESS_PLANK","RICHNESS_SESS","RICHNESS_INVERT")
i=1
out.all=list()
var.imp=list()
fss.all=list()
top.all=list()
pdf(file="mod_fits_cor_&_bot_vars_richness_DRY_TORT1.pdf",onefile=T)
for(i in 1:length(resp.vars)){
 use.dat=na.omit(dat3_DT[,c(null.vars,cont.preds,cat.preds,resp.vars[i])])
 use.dat$response=use.dat[,resp.vars[i]]
 Model1=gam(response~s(PCT_HARD_BOTTOM.X3,k=5,bs='cr') + te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='cr',k=4), family=poisson(), data=use.dat)
 
 model.set=generate.model.set(use.dat=use.dat,max.predictors=2,   # limit size here because null model already complex
                             test.fit=Model1,k=5,
                             pred.vars.cont=cont.preds,
                             pred.vars.fact=cat.preds,
                             null.terms="te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='cr',k=4)")

 out.list=fit.model.set(model.set, parallel = TRUE)
 fss.all=c(fss.all,list(out.list))
 mod.table=out.list$mod.data.out
 mod.table=mod.table[order(mod.table$AICc),]
 out.i=mod.table
 out.all=c(out.all,list(out.i))
 var.imp=c(var.imp,list(out.list$variable.importance$aic$variable.weights.raw))
 all.less.2AICc=mod.table[which(mod.table$delta.AICc<2),]
 top.all=c(top.all,list(all.less.2AICc))

 # plot the all best models
 par(oma=c(1,1,4,1))
 for(r in 1:nrow(all.less.2AICc)){
 best.model.name=as.character(all.less.2AICc$modname[r])
 best.model=out.list$success.models[[best.model.name]]
 if(best.model.name!="null"){
  plot(best.model,all.terms=T,pages=1,residuals=F,pch=16)
  mtext(side=3,text=resp.vars[i],outer=T)}
 }
}
dev.off()

names(out.all)=resp.vars
names(var.imp)=resp.vars
names(top.all)=resp.vars
names(fss.all)=resp.vars

all.mod.fits=do.call("rbind",out.all)
all.var.imp.richness.DT=do.call("rbind",var.imp)
top.mod.fits=do.call("rbind",top.all)

write.csv(all.mod.fits[,-2],"all_mod_cor_&_bot_vars_richness_DRY_TORT.csv")
write.csv(top.mod.fits[,-2],"top_mod_cor_&_bot_vars_richness_DRY_TORT.csv")
write.csv(model.set$predictor.correlations,"preds_cors_cor_&_bot_vars_richness_DRY_TORT.csv")

# Fit the model for biomass, density, and shannon diversity using tweedie family for DRY TORTUGAS
resp.vars = c("FISH_BIOMASS","BIOMASS_CORALL","BIOMASS_CRUST","BIOMASS_HMD","BIOMASS_MACRO","BIOMASS_MICRO","BIOMASS_PISC","BIOMASS_PLANK","BIOMASS_SESS","BIOMASS_INVERT","FISH_DENSITY","DENSITY_CORALL","DENSITY_CRUST","DENSITY_HMD","DENSITY_MACRO","DENSITY_MICRO","DENSITY_PISC","DENSITY_PLANK","DENSITY_SESS","DENSITY_INVERT","FISH_SHANNON")
i=1
out.all=list()
var.imp=list()
fss.all=list()
top.all=list()
pdf(file="mod_fits_cor_&_bot_vars_others_DRY_TORT.pdf",onefile=T)
for(i in 1:length(resp.vars)){
 use.dat=na.omit(dat3_DT[,c(null.vars,cont.preds,cat.preds,resp.vars[i])])
 use.dat$response=use.dat[,resp.vars[i]]
 Model1=gam(response~s(PCT_HARD_BOTTOM.X3,k=5,bs='cr') + te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='cr',k=4), family=tw(), data=use.dat)
 
 model.set=generate.model.set(use.dat=use.dat,max.predictors=2,   # limit size here because null model already complex
                             test.fit=Model1,k=5,
                             pred.vars.cont=cont.preds,
                             pred.vars.fact=cat.preds,
                             null.terms="te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='cr',k=4)")

 out.list=fit.model.set(model.set, parallel = TRUE)
 fss.all=c(fss.all,list(out.list))
 mod.table=out.list$mod.data.out
 mod.table=mod.table[order(mod.table$AICc),]
 out.i=mod.table
 out.all=c(out.all,list(out.i))
 var.imp=c(var.imp,list(out.list$variable.importance$aic$variable.weights.raw))
 all.less.2AICc=mod.table[which(mod.table$delta.AICc<2),]
 top.all=c(top.all,list(all.less.2AICc))

 # plot the all best models
 par(oma=c(1,1,4,1))
 for(r in 1:nrow(all.less.2AICc)){
 best.model.name=as.character(all.less.2AICc$modname[r])
 best.model=out.list$success.models[[best.model.name]]
 if(best.model.name!="null"){
  plot(best.model,all.terms=T,pages=1,residuals=F,pch=16)
  mtext(side=3,text=resp.vars[i],outer=T)}
 }
}
dev.off()

names(out.all)=resp.vars
names(var.imp)=resp.vars
names(top.all)=resp.vars
names(fss.all)=resp.vars

all.mod.fits=do.call("rbind",out.all)
all.var.imp.others.DT=do.call("rbind",var.imp)
top.mod.fits=do.call("rbind",top.all)

write.csv(all.mod.fits[,-2],"all_mod_cor_&_bot_vars_others_DRY_TORT1.csv")
write.csv(top.mod.fits[,-2],"top_mod_cor_&_bot_vars_others_DRY_TORT1.csv")
write.csv(model.set$predictor.correlations,"preds_cors_cor_&_bot_vars_others_DRY_TORT1.csv")


all.var.imp.DT <- rbind(all.var.imp.others.DT,all.var.imp.richness.DT)
pdf(file="var_imp_hmap_cor_&_bot_vars_DRY_TORT1.pdf",height=15,width=21,pointsize=10)
heatmap.2(all.var.imp.DT,notecex=0.4,  dendrogram ="none",
                     col=colorRampPalette(c("white","yellow","orange","red"))(30),
                     trace="none",key.title = "",keysize=1,
                     notecol="black",key=T,
                     sepcolor = "black",margins=c(15,20), lhei=c(2,10),lwid=c(2,10),
                     Rowv=FALSE,Colv=FALSE,
          cexRow = 0.2 + 1/log10(nrow(all.var.imp)),
           cexCol = 0.5 + 1/log10(ncol(all.var.imp)),
          srtCol=45,  adjCol = c(1,1)   )
dev.off()

rows <- c("FISH_BIOMASS","FISH_DENSITY","FISH_RICHNESS","FISH_SHANNON") 
all.var.imp.DT.1 <- all.var.imp.DT [rownames(all.var.imp.DT) %in% rows, ]  

pdf(file="var_imp_hmap_cor_&_bot_vars_DRY_TORT2.pdf",height=15,width=21,pointsize=10)
heatmap.2(all.var.imp.DT.1,notecex=0.4,  dendrogram ="none",
                     col=colorRampPalette(c("white","yellow","orange","red"))(30),
                     trace="none",key.title = "",keysize=1,
                     notecol="black",key=T,
                     sepcolor = "black",margins=c(15,20), lhei=c(2,10),lwid=c(2,10),
                     Rowv=FALSE,Colv=FALSE,
          cexRow = 0.2 + 1/log10(nrow(all.var.imp)),
           cexCol = 0.5 + 1/log10(ncol(all.var.imp)),
          srtCol=45,  adjCol = c(1,1)   )
dev.off()

rows <- c("FISH_BIOMASS","FISH_DENSITY","FISH_RICHNESS","FISH_SHANNON") 
all.var.imp.DT.1 <- all.var.imp.DT [rownames(all.var.imp.DT) %in% rows, ]  

pdf(file="var_imp_hmap_cor_&_bot_vars_DRY_TORT2.pdf",height=15,width=21,pointsize=10)
heatmap.2(all.var.imp.DT.1,notecex=0.4,  dendrogram ="none",
                     col=colorRampPalette(c("white","yellow","orange","red"))(30),
                     trace="none",key.title = "",keysize=1,
                     notecol="black",key=T,
                     sepcolor = "black",margins=c(15,20), lhei=c(2,10),lwid=c(2,10),
                     Rowv=FALSE,Colv=FALSE,
          cexRow = 0.2 + 1/log10(nrow(all.var.imp)),
           cexCol = 0.5 + 1/log10(ncol(all.var.imp)),
          srtCol=45,  adjCol = c(1,1)   )
dev.off()



############### FLORIDA KEYS ONLY #######################################
# Model fish richness using poisson family for all regions combined
cat.preds=c("HABITAT","PROT") # Don't use REGION 
dat3_FK <- dat3 %>% filter(REGION == "FLA KEYS")
resp.vars = c("FISH_RICHNESS","RICHNESS_CORALL","RICHNESS_CRUST","RICHNESS_HMD","RICHNESS_MACRO","RICHNESS_MICRO","RICHNESS_PISC","RICHNESS_PLANK","RICHNESS_SESS","RICHNESS_INVERT")
i=1
out.all=list()
var.imp=list()
fss.all=list()
top.all=list()
pdf(file="mod_fits_cor_&_bot_vars_richness_FLA_KEYS.pdf",onefile=T)
for(i in 1:length(resp.vars)){
 use.dat=na.omit(dat3_FK[,c(null.vars,cont.preds,cat.preds,resp.vars[i])])
 use.dat$response=use.dat[,resp.vars[i]]
 Model1=gam(response~s(PCT_HARD_BOTTOM.X3,k=5,bs='cr') + te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='cr',k=3), family=poisson(), data=use.dat)
 
 model.set=generate.model.set(use.dat=use.dat,max.predictors=2,   # limit size here because null model already complex
                             test.fit=Model1,k=5,
                             pred.vars.cont=cont.preds,
                             pred.vars.fact=cat.preds,
                             null.terms="te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='cr',k=3)")

 out.list=fit.model.set(model.set, parallel = TRUE)
 fss.all=c(fss.all,list(out.list))
 mod.table=out.list$mod.data.out
 mod.table=mod.table[order(mod.table$AICc),]
 out.i=mod.table
 out.all=c(out.all,list(out.i))
 var.imp=c(var.imp,list(out.list$variable.importance$aic$variable.weights.raw))
 all.less.2AICc=mod.table[which(mod.table$delta.AICc<2),]
 top.all=c(top.all,list(all.less.2AICc))

 # plot the all best models
 par(oma=c(1,1,4,1))
 for(r in 1:nrow(all.less.2AICc)){
 best.model.name=as.character(all.less.2AICc$modname[r])
 best.model=out.list$success.models[[best.model.name]]
 if(best.model.name!="null"){
  plot(best.model,all.terms=T,pages=1,residuals=F,pch=16)
  mtext(side=3,text=resp.vars[i],outer=T)}
 }
}
dev.off()

names(out.all)=resp.vars
names(var.imp)=resp.vars
names(top.all)=resp.vars
names(fss.all)=resp.vars

all.mod.fits=do.call("rbind",out.all)
all.var.imp.richness.FK=do.call("rbind",var.imp)
top.mod.fits=do.call("rbind",top.all)

write.csv(all.mod.fits[,-2],"all_mod_cor_&_bot_vars_richness_FLA_KEYS.csv")
write.csv(top.mod.fits[,-2],"top_mod_cor_&_bot_vars_richness_FLA_KEYS.csv")
write.csv(model.set$predictor.correlations,"preds_cors_cor_&_bot_vars_richness_FLA_KEYS.csv")

# Fit the model for biomass, density, and shannon diversity using tweedie family for FLA KEYS
resp.vars = c("FISH_BIOMASS","BIOMASS_CORALL","BIOMASS_CRUST","BIOMASS_HMD","BIOMASS_MACRO","BIOMASS_MICRO","BIOMASS_PISC","BIOMASS_PLANK","BIOMASS_SESS","BIOMASS_INVERT","FISH_DENSITY","DENSITY_CORALL","DENSITY_CRUST","DENSITY_HMD","DENSITY_MACRO","DENSITY_MICRO","DENSITY_PISC","DENSITY_PLANK","DENSITY_SESS","DENSITY_INVERT","FISH_SHANNON")
i=1
out.all=list()
var.imp=list()
fss.all=list()
top.all=list()
pdf(file="mod_fits_cor_&_bot_vars_others_FLA_KEYS.pdf",onefile=T)
for(i in 1:length(resp.vars)){
 use.dat=na.omit(dat3_FK[,c(null.vars,cont.preds,cat.preds,resp.vars[i])])
 use.dat$response=use.dat[,resp.vars[i]]
 Model1=gam(response~s(PCT_HARD_BOTTOM.X3,k=5,bs='cr') + te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='cr',k=3), family=tw(), data=use.dat)
 
 model.set=generate.model.set(use.dat=use.dat,max.predictors=2,   # limit size here because null model already complex
                             test.fit=Model1,k=5,
                             pred.vars.cont=cont.preds,
                             pred.vars.fact=cat.preds,
                             null.terms="te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='cr',k=3)")

 out.list=fit.model.set(model.set, parallel = TRUE)
 fss.all=c(fss.all,list(out.list))
 mod.table=out.list$mod.data.out
 mod.table=mod.table[order(mod.table$AICc),]
 out.i=mod.table
 out.all=c(out.all,list(out.i))
 var.imp=c(var.imp,list(out.list$variable.importance$aic$variable.weights.raw))
 all.less.2AICc=mod.table[which(mod.table$delta.AICc<2),]
 top.all=c(top.all,list(all.less.2AICc))

 # plot the all best models
 par(oma=c(1,1,4,1))
 for(r in 1:nrow(all.less.2AICc)){
 best.model.name=as.character(all.less.2AICc$modname[r])
 best.model=out.list$success.models[[best.model.name]]
 if(best.model.name!="null"){
  plot(best.model,all.terms=T,pages=1,residuals=F,pch=16)
  mtext(side=3,text=resp.vars[i],outer=T)}
 }
}
dev.off()

names(out.all)=resp.vars
names(var.imp)=resp.vars
names(top.all)=resp.vars
names(fss.all)=resp.vars

all.mod.fits=do.call("rbind",out.all)
all.var.imp.others.FK=do.call("rbind",var.imp)
top.mod.fits=do.call("rbind",top.all)

write.csv(all.mod.fits[,-2],"all_mod_cor_&_bot_vars_others_FLA_KEYS.csv")
write.csv(top.mod.fits[,-2],"top_mod_cor_&_bot_vars_others_FLA_KEYS.csv")
write.csv(model.set$predictor.correlations,"preds_cors_cor_&_bot_vars_others_FLA_KEYS.csv")


all.var.imp.FK <- rbind(all.var.imp.others.FK,all.var.imp.richness.FK)
pdf(file="var_imp_hmap_cor_&_bot_vars_FLA_KEYS1.pdf",height=15,width=21,pointsize=10)
heatmap.2(all.var.imp.FK,notecex=0.4,  dendrogram ="none",
                     col=colorRampPalette(c("white","yellow","orange","red"))(30),
                     trace="none",key.title = "",keysize=1,
                     notecol="black",key=T,
                     sepcolor = "black",margins=c(15,20), lhei=c(2,10),lwid=c(2,10),
                     Rowv=FALSE,Colv=FALSE,
          cexRow = 0.2 + 1/log10(nrow(all.var.imp)),
           cexCol = 0.5 + 1/log10(ncol(all.var.imp)),
          srtCol=45,  adjCol = c(1,1)   )
dev.off()



rows <- c("FISH_BIOMASS","FISH_DENSITY","FISH_RICHNESS","FISH_SHANNON") 
all.var.imp.FK.1 <- all.var.imp.FK [rownames(all.var.imp.FK) %in% rows, ]  

pdf(file="var_imp_hmap_cor_&_bot_vars_FLA_KEYS2.pdf",height=15,width=21,pointsize=10)
heatmap.2(all.var.imp.FK.1,notecex=0.4,  dendrogram ="none",
                     col=colorRampPalette(c("white","yellow","orange","red"))(30),
                     trace="none",key.title = "",keysize=1,
                     notecol="black",key=T,
                     sepcolor = "black",margins=c(15,20), lhei=c(2,10),lwid=c(2,10),
                     Rowv=FALSE,Colv=FALSE,
          cexRow = 0.2 + 1/log10(nrow(all.var.imp)),
           cexCol = 0.5 + 1/log10(ncol(all.var.imp)),
          srtCol=45,  adjCol = c(1,1)   )
dev.off()


############### SOUTH FLORIDA ONLY #######################################
# Model fish richness using poisson family for SOUTH FLORIDA
cat.preds=c("HABITAT") # Don't use PROT b/c only 1 level in SF
dat3_SF <- dat3 %>% filter(REGION == "SEFCRI")
resp.vars = c("FISH_RICHNESS","RICHNESS_CORALL","RICHNESS_CRUST","RICHNESS_HMD","RICHNESS_MACRO","RICHNESS_MICRO","RICHNESS_PISC","RICHNESS_PLANK","RICHNESS_SESS","RICHNESS_INVERT")
i=1
out.all=list()
var.imp=list()
fss.all=list()
top.all=list()
pdf(file="mod_fits_cor_&_bot_vars_richness_SEFCRI.pdf",onefile=T)
for(i in 1:length(resp.vars)){
 use.dat=na.omit(dat3_SF[,c(null.vars,cont.preds,cat.preds,resp.vars[i])])
 use.dat$response=use.dat[,resp.vars[i]]
 Model1=gam(response~s(PCT_HARD_BOTTOM.X3,k=5,bs='cr') + te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='cr',k=4), family=poisson(), data=use.dat)
 
 model.set=generate.model.set(use.dat=use.dat,max.predictors=2,   # limit size here because null model already complex
                             test.fit=Model1,k=5,
                             pred.vars.cont=cont.preds,
                             pred.vars.fact=cat.preds,
                             null.terms="te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='cr',k=4)")

 out.list=fit.model.set(model.set, parallel = TRUE)
 fss.all=c(fss.all,list(out.list))
 mod.table=out.list$mod.data.out
 mod.table=mod.table[order(mod.table$AICc),]
 out.i=mod.table
 out.all=c(out.all,list(out.i))
 var.imp=c(var.imp,list(out.list$variable.importance$aic$variable.weights.raw))
 all.less.2AICc=mod.table[which(mod.table$delta.AICc<2),]
 top.all=c(top.all,list(all.less.2AICc))

 # plot the all best models
 par(oma=c(1,1,4,1))
 for(r in 1:nrow(all.less.2AICc)){
 best.model.name=as.character(all.less.2AICc$modname[r])
 best.model=out.list$success.models[[best.model.name]]
 if(best.model.name!="null"){
  plot(best.model,all.terms=T,pages=1,residuals=F,pch=16)
  mtext(side=3,text=resp.vars[i],outer=T)}
 }
}
dev.off()

names(out.all)=resp.vars
names(var.imp)=resp.vars
names(top.all)=resp.vars
names(fss.all)=resp.vars

all.mod.fits=do.call("rbind",out.all)
all.var.imp.richness.SF=do.call("rbind",var.imp)
top.mod.fits=do.call("rbind",top.all)

write.csv(all.mod.fits[,-2],"all_mod_cor_&_bot_vars_richness_SEFCRI.csv")
write.csv(top.mod.fits[,-2],"top_mod_cor_&_bot_vars_richness_SEFCRI.csv")
write.csv(model.set$predictor.correlations,"preds_cors_cor_&_bot_vars_richness_SEFCRI.csv")

# Fit the model for biomass, density, and shannon diversity using tweedie family for SOUTH FLORIDA
resp.vars = c("FISH_BIOMASS","BIOMASS_CORALL","BIOMASS_CRUST","BIOMASS_HMD","BIOMASS_MACRO","BIOMASS_MICRO","BIOMASS_PISC","BIOMASS_PLANK","BIOMASS_SESS","BIOMASS_INVERT","FISH_DENSITY","DENSITY_CORALL","DENSITY_CRUST","DENSITY_HMD","DENSITY_MACRO","DENSITY_MICRO","DENSITY_PISC","DENSITY_PLANK","DENSITY_SESS","DENSITY_INVERT","FISH_SHANNON")
i=1
out.all=list()
var.imp=list()
fss.all=list()
top.all=list()
pdf(file="mod_fits_cor_&_bot_vars_others_SEFCRI.pdf",onefile=T)
for(i in 1:length(resp.vars)){
 use.dat=na.omit(dat3_SF[,c(null.vars,cont.preds,cat.preds,resp.vars[i])])
 use.dat$response=use.dat[,resp.vars[i]]
 Model1=gam(response~s(PCT_HARD_BOTTOM.X3,k=5,bs='cr') + te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='cr',k=4), family=tw(), data=use.dat)
 
 model.set=generate.model.set(use.dat=use.dat,max.predictors=2,   # limit size here because null model already complex
                             test.fit=Model1,k=5,
                             pred.vars.cont=cont.preds,
                             pred.vars.fact=cat.preds,
                             null.terms="te(LAT_DEGREES_FISH,LON_DEGREES_FISH,bs='ps',k=5) + s(YEAR,bs='cr',k=4)")

 out.list=fit.model.set(model.set, parallel = TRUE)
 fss.all=c(fss.all,list(out.list))
 mod.table=out.list$mod.data.out
 mod.table=mod.table[order(mod.table$AICc),]
 out.i=mod.table
 out.all=c(out.all,list(out.i))
 var.imp=c(var.imp,list(out.list$variable.importance$aic$variable.weights.raw))
 all.less.2AICc=mod.table[which(mod.table$delta.AICc<2),]
 top.all=c(top.all,list(all.less.2AICc))

 # plot the all best models
 par(oma=c(1,1,4,1))
 for(r in 1:nrow(all.less.2AICc)){
 best.model.name=as.character(all.less.2AICc$modname[r])
 best.model=out.list$success.models[[best.model.name]]
 if(best.model.name!="null"){
  plot(best.model,all.terms=T,pages=1,residuals=F,pch=16)
  mtext(side=3,text=resp.vars[i],outer=T)}
 }
}
dev.off()

names(out.all)=resp.vars
names(var.imp)=resp.vars
names(top.all)=resp.vars
names(fss.all)=resp.vars

all.mod.fits=do.call("rbind",out.all)
all.var.imp.others.SF=do.call("rbind",var.imp)
top.mod.fits=do.call("rbind",top.all)

write.csv(all.mod.fits[,-2],"all_mod_cor_&_bot_vars_others_SEFCRI.csv")
write.csv(top.mod.fits[,-2],"top_mod_cor_&_bot_vars_others_SEFCRI.csv")
write.csv(model.set$predictor.correlations,"preds_cors_cor_&_bot_vars_others_SEFCRI.csv")

all.var.imp.SF <- rbind(all.var.imp.others.SF,all.var.imp.richness.SF)
pdf(file="var_imp_hmap_cor_&_bot_vars_SEFCRI1.pdf",height=15,width=21,pointsize=10)
heatmap.2(all.var.imp.SF,notecex=0.4,  dendrogram ="none",
                     col=colorRampPalette(c("white","yellow","orange","red"))(30),
                     trace="none",key.title = "",keysize=1,
                     notecol="black",key=T,
                     sepcolor = "black",margins=c(15,20), lhei=c(2,10),lwid=c(2,10),
                     Rowv=FALSE,Colv=FALSE,
          cexRow = 0.2 + 1/log10(nrow(all.var.imp)),
           cexCol = 0.5 + 1/log10(ncol(all.var.imp)),
          srtCol=45,  adjCol = c(1,1)   )
dev.off()


rows <- c("FISH_BIOMASS","FISH_DENSITY","FISH_RICHNESS","FISH_SHANNON") 
all.var.imp.SF.1 <- all.var.imp.SF [rownames(all.var.imp.SF) %in% rows, ]  

pdf(file="var_imp_hmap_cor_&_bot_vars_SEFCRI2.pdf",height=15,width=21,pointsize=10)
heatmap.2(all.var.imp.SF.1,notecex=0.4,  dendrogram ="none",
                     col=colorRampPalette(c("white","yellow","orange","red"))(30),
                     trace="none",key.title = "",keysize=1,
                     notecol="black",key=T,
                     sepcolor = "black",margins=c(15,20), lhei=c(2,10),lwid=c(2,10),
                     Rowv=FALSE,Colv=FALSE,
          cexRow = 0.2 + 1/log10(nrow(all.var.imp)),
           cexCol = 0.5 + 1/log10(ncol(all.var.imp)),
          srtCol=45,  adjCol = c(1,1)   )
dev.off()

all.var.imp.SF.3 <- cbind(all.var.imp.SF.1,PROT = 0)
all.var.imp.1 <- rbind(all.var.imp.DT.1,all.var.imp.FK.1,all.var.imp.SF.3)

pdf(file="var_imp_hmap_cor_&_bot_vars_all_regions.pdf",height=15,width=21,pointsize=10)
heatmap.2(all.var.imp.1,notecex=0.4,  dendrogram ="none",
                     col=colorRampPalette(c("white","yellow","orange","red"))(30),
                     trace="none",key.title = "",keysize=1,
                     notecol="black",key=T,
                     sepcolor = "black",margins=c(15,20), lhei=c(2,10),lwid=c(2,10),
                     Rowv=FALSE,Colv=FALSE,
          cexRow = 0.2 + 1/log10(nrow(all.var.imp)),
           cexCol = 0.5 + 1/log10(ncol(all.var.imp)),
          srtCol=45,  adjCol = c(1,1)   )
dev.off()


all.var.imp.2 <- all.var.imp.1[c(1,5,9,2,6,10,3,7,11,4,8,12),]
pdf(file="var_imp_hmap_cor_&_bot_vars_all_regions2.pdf",height=15,width=21,pointsize=10)
heatmap.2(all.var.imp.2,notecex=0.4,  dendrogram ="none",
                     col=colorRampPalette(c("white","yellow","orange","red"))(30),
                     trace="none",key.title = "",keysize=1,
                     notecol="black",key=T,
                     sepcolor = "black",margins=c(15,20), lhei=c(2,10),lwid=c(2,10),
                     Rowv=FALSE,Colv=FALSE,
          cexRow = 0.2 + 1/log10(nrow(all.var.imp)),
           cexCol = 0.5 + 1/log10(ncol(all.var.imp)),
          srtCol=45,  adjCol = c(1,1)   )
dev.off()

# extract the best model
mod.table=mod.table[order(mod.table$AIC),]
best.model=out.list$success.models[[as.character(mod.table$modname[2])]]
plot(best.model,all.terms=T,pages=1,rug=TRUE,residuals=FALSE)
gam.check(best.model)
summary(best.model)

# Response variables
for(r in 1:length(resp.vars)){
 par(mfrow=c(2,1))
hist(dat3[,resp.vars[r]],main=resp.vars[r])
 plot(jitter(dat3[,resp.vars[r]])) }

# Continuous variables
for(p in 1:length(cont.preds)){
par(mfrow=c(2,1))
 hist(dat3[,cont.preds[p]],main=cont.preds[p])
 plot(jitter(dat3[,cont.preds[p]]))}

# Variable importance
heatmap.2(all.var.imp, dendrogram ="none",
                     col=colorRampPalette(c("white","yellow","orange","red"))(30),
                     trace="none",key.title = "",keysize=2,
                     notecol="black",key=T,
                     sepcolor = "black",
                     Rowv=FALSE,Colv=FALSE)

Citations

citation("geosphere")
# Add to citation list
# Parravicini et al. 2020
# Heidmann et al. 2024

Hijmans R (2022). geosphere: Spherical Trigonometry. R package version 1.5-18, https://CRAN.R-project.org/package=geosphere.