Reef Visual Census Background

Reef fish community data used in this study were collected as part of the multi-agency Reef Visual Census (RVC) Brandt et al., 2009. Fish communities were visually surveyed underwater annually in the mainland FKNMS (Lower Keys, Middle Keys and Upper Keys) from 1999 to 2012, and biennially from 2012 to 2016. The Dry Tortugas region was surveyed from 1999 to 2000 and biennially from 2004 to 2016.

The RVC uses a stationary point count method within a randomly selected 7.5 m radius circular plot with a 200m map grid from 1999 to 2012 and 100m map grid from 2014 to 2016 to optimize the observation of conspicuous and diurnally active reef fish, specifically economically and ecologically important reef fish species Bohnsack and Bannerot, 1986.

Objective

To analyze and compare changes in species richness, Simpson diversity, Shannon diversity and functional diversity in the Florida Keys National Marine Sanctuary (FKNMS) to reveal changes in temporal and spatial variability from 1999 – 2016. The indices were assessed throughout the Florida Keys, between four distinct regions of the FKNMS (Upper Keys, Middle Keys, Lower Keys, and Dry Tortugas) and for different levels of management (Ecological Reserves (ER), Sanctuary Preservations Areas (SPA), and Special Use/Research Only Areas (SU/RO)).

Diversity indicse as effective number of species

All indices were computed as effective number of species. Effective number of species is the number of equally abundance species needed to produce the observed value of a diversity index (Jost, 2006; Jost et al., 2010; MacArthur, 1965). Jost, 2006 refers to these as true measures of diversity, where prior to the conversion the diversity indices are simply measures of entropy.

Outputs

  1. Functional dendogram
  2. Species weighted abundance matrix by domain for each sampling year and region
    • Species richness, Simpson diversity, Shannon diversity, and Functional diversity as effective number of species for each sampling year by domain
    • plots
  3. Species weighted abundance matrix by strata for each sampling year and region
    • Species richness, Simpson diversity, Shannon diversity, and Functional diversity as effective number of species for each sampling year by strata and region
    • plots
  4. Species weighted abundance matrix by primary sampling unit for each sampling year and region
    • Species richness, Simpson diversity, Shannon diversity, and Functional diversity as effective number of species for each sampling year by primary sampling unit
    • plots
  5. Environmental factors from SERC
fk1999 <- getRvcData(1999, "FLA KEYS", server = 'https://www.sefsc.noaa.gov/rvc_analysis20/') 

spp_list <- fk1999$taxonomic_data$COMNAME
sp_list <- as.data.frame(spp_list, row.names = F)
write_csv(sp_list, "spp_list.csv") 

trial <- getDomainAbundance(fk1999, "ocy chry", merge_protected = F ) # returns: YEAR, REGION, SPECIES_CD, density, var, n, nm, N, NM, protected_status

trial2 <- getDomainDensity(fk1999, "ocy chry", merge_protected = F ) # returns: YEAR, REGION, SPECIES_CD, density, var, n, nm, N, NM, protected_status

#getDomainDensity(fk1999, "ocy chry", is_protected = T) # returns: YEAR, REGION, SPECIES_CD, density, var, n, nm, N, NM for only protected areas 

#getDomainDensity(fk1999, "ocy chry", when_present = T)

abun1999 <- getDomainAbundance(fk1999, spp_list, merge_protected = F)

write_csv(abun1999, 'abunfk1999.csv') #save domain abundance as csv 

div_1999_tbl = abun1999 %>%
  select(YEAR, REGION, SPECIES_CD, abundance, protected_status) %>%
  nest(-YEAR) %>% 
  mutate(
    data_wide = map(data, ~ spread(data=.x, SPECIES_CD, abundance, fill =0)), 
    dom_richness = map(
      data_wide, 
      function(x) specnumber(x %>% select(-REGION, protected_status))),
    dom_simpson = map(
      data_wide,
      function(x) 1/(1 - diversity(x %>% select(-REGION, -protected_status), index = 'simpson'))),
    dom_shannon = map( 
      data_wide,
      function(x) exp(diversity(x %>% select(-REGION, -protected_status), index = 'shannon'))))
##Percent of the abundance data is from species identified to the genus? 

domain_fk_abun <- read_csv('big_csv/abundance_domain/domain_fk_abun.csv', 
col_types = cols(
  YEAR = col_integer(),
  REGION = col_character(),
  SPECIES_CD = col_character(),
  abundance = col_double(),
  var = col_double(),
  n = col_integer(),
  nm = col_integer(),
  N = col_double(),
  NM = col_double(),
  protected_status = col_character()
))

spp_abun = domain_fk_abun %>%
  filter(stringr::str_detect(SPECIES_CD, 'SPE\\.$')) %>%
  #sum(.$abundance, na.rm=T)
  .$abundance %>% 
  sum() #1923525258 

total_abun = sum(domain_fk_abun$abundance) #23,434,984,377 counts over 15 years in FK

percent_spp = (spp_abun/total_abun)*100 

Functional distance trait based matrix (from Lefcheck et al.,)

#rvc_grp = read_csv('data/rvc_spp_grouped.csv')

# wide: common_name x traits
d_traits = rvc_grp %>% 
  group_by(
    common_name, maxlength, trophic_level, trophic_group, water_column, diel_activity, substrate_type, complexity, gregariousness) %>%
  summarize(
    n = n()) %>%
  select(-n) %>%
  ungroup() %>%
  mutate(
    # ordinal traits
    complexity = factor(
      complexity, levels=c("Low","Medium","High"), ordered=T)) %>%
  arrange(common_name) %>%
  as.data.frame()

#dim(d_traits) 333, 8 (species*traits)

# conform to: species x traits
rownames(d_traits) = d_traits$common_name
d_traits = select(d_traits, -common_name)
head(d_traits)

#Calculate Gower distances
traits.dist=gowdis(d_traits,ord="podani")

#Use clustering method to produce ultrametric dendrogramBecause values of Rao's Q can be maximized when fewer than the max number of functional types are present unless distances are ultramtetric 

#to account for sensitivity in clustering use multiple algorithms  (Mouchet et al., 2008) 
tree_methods = c("single","complete","average","mcquitty","ward.D") #average is best clustering method 
trees=lapply(tree_methods,function(i) hclust(traits.dist, method=i))
#par(mfrow=c(3,2))
#for(i in 1:length(trees)) {plot(trees[[i]])}

#convert trees to ultrametric
trees.ultra=lapply(trees,function(i) cl_ultrametric(as.hclust(i)))

#Plot each tree
par(mfrow=c(3,2))
for (i in 1:length(trees.ultra)) {plot(trees.ultra[[i]])}

#Build the consensus tree (Mouchet et al 2008 Oikos) from package clue 
ensemble.trees=cl_ensemble(list=trees) #list of clusterings 
class(ensemble.trees)
consensus.tree=cl_consensus(ensemble.trees) #synthesizes the information in the elements of a cluster ensemble into a single clustering 
#plot(consensus.tree)

#Calculate dissimilarity values for each tree using 2-norm (Merigot et al 2010 Ecology) to determine which tree best preserves orignial distances
all.trees=c(trees.ultra,consensus.tree[1])
names(all.trees)=c(tree_methods,"consensus")
trees.dissim=lapply(all.trees,function(i) cl_dissimilarity(i,traits.dist,method="spectral")) #spectral norm (2-norm) of the differences of the ultrametrics

#Identify best tree and isolate
trees.dissim2=do.call(rbind,trees.dissim)
min.tree=which.min(trees.dissim2)
names(all.trees)[min.tree]
func.dist=all.trees[names(all.trees)==names(all.trees)[min.tree]][[1]]

#Confirm lowest 2-norm value
cl_dissimilarity(func.dist,traits.dist,method="spectral")

#Scale by the max value so that all values are between 0-1 (clue package)
func.dist=func.dist/max(func.dist)

#Plot the best tree
par(mfrow=c(1,1))
par(mar=c(3,1,0,16))
plot(func.dist,horiz=TRUE)
#Save plot: 10" x 15"

#Write newick tree
write.tree(as.phylo(as.hclust(func.dist)),"data/dendro_functional.nwk")

#Visualize species' differences in multivariate trait space
#Perform k-means clustering with no a priori specification for k
traits.kclus=pamk(traits.dist,krange=2:10)
# #Perform multidimensional scaling on functional dendrogram
traits_nmds=metaMDS(traits.dist,k=traits.kclus$nc,trymax=500)
# #Plot in two dimensions
par(mar=c(4,4,1,1))
ordiplot(traits_nmds,type="n")
#Assign colors to different groups
groups=levels(factor(traits.kclus$pamobject$clustering))
points.symbols=15:16
points.colors=c("firebrick3","cornflowerblue")
for(i in seq_along(groups)) {
   points(traits_nmds$points[traits.kclus$pamobject$clustering==groups[i],],
          pch=points.symbols[i],col=points.colors[i],cex=1.4) }
 ordispider(traits_nmds,factor(traits_kclus$pamobject$clustering),label=F)
 ordihull(traits_nmds,factor(traits.kclus$pamobject$clustering),lty="dotted")
 orditorp(traits_nmds,dis="sites",pcex=0,air=0.5,col="grey10",cex=0.8)

Trait matrix visualization [http://jkunst.com/r/pokemon-visualize-em-all/]

Fetch the RVC data from NOAA’s Southeast Fisheries Science Center server using the RVC package

Sample Data Variables:

  • PRIMARY_SAMPLE_UNIT: A code indicating the primary sample unit in which a sample was collected.
  • YEAR: A number indicating the calendar year.
  • MONTH: A number indicating the month of the year.
  • DAY: A number indicating the day of the month (EST).
  • STATION_NR: A number indicating the secondary sampling unit within a given primary sample unit.
  • LAT_DEGREES: Latitude of secondary sampling unit in decimal degrees).
  • LON_DEGREES: Longitude of secondary sampling unit in decimal degrees).
  • DEPTH: Average depth, in meters, of secondary sampling unit).
  • UNDERWATER_VISIBILITY: visibility, in meters, at secondary sampling unit.
  • MAPGRID_NR: A number indicating the primary sample unit.
  • HABITAT_CD: A code indicating the habitat type.
  • ZONE_NR: A code indicating the distance offshore:
    • 1 - Inshore
    • 2 - Midchannel
    • 3 - Offshore
    • 4 - Fore-reef
  • SUBREGION_NR: A number indicating the subregion.
  • MPA_NR: A number identifying the marine protected area in which the sample was collected. Zero indicates unprotected status)
  • SPECIES_NR: A number indicating the species for a sample
  • SPECIES_CD: A code indicating the species for a sample. Consists of the first three letters of the generic name and the first four of the specific name
  • LEN: The length, in cm, of a sample.
  • NUM: The number of individuals of a given species and length observed in a secondary sampling unit
  • TIME_SEEN: A number indicating when, during sampling, an individual was observed. 1: In the first five minutes, 2: From 5-10 minutes, 3: After 10 minutes.
  • PROT: A boolean value indicating whether a sample was in a protected area or not
    • 1 - protected area
    • 0 - not protected
  • STRAT: A code indicating the stratum in which a sample was taken. Differs by region.
    • FMLR
    • FSLR
    • HRRF
    • INPR
    • MCPR
    • OFPR
  • REGION: A code indicating the region in which a sample was taken.
    • FLA KEYS (florida keys)
    • DRY TORT (dry tortugas)

Stratum Data Variables

  • REGION
  • YEAR
  • PROT
  • STRAT
  • NTOT: The number of possible primary sample units for a given year, region, stratum, and protected status
  • GRID_SIZE: The length (in meters) to a side of a primary sample unit for a given year, region, stratum, and protected status.

Taxonomic Data Variables

  • SPECIES_CD
  • FAMILY
  • SCINAME
  • COMNAME
  • LC: Minimum length at capture, in centimeters
  • LM: Median length at maturity, in centimeters.
  • WLEN_A: The linear coefficient of the allometric growth equation in grams per millimeter (g/mm)
  • WLEN_B: The exponential coefficient of the allometric growth equation

Benthic Data Variables:

  • REGION: A code indicating the region.
  • YEAR: A number indicating the calendar year.
  • PRIMARY_SAMPLE_UNIT: A code indicating the primary sample unit in which a sample was collected.
  • STATION_NR: A number indicating the secondary sampling unit within a given primary sample unit.
  • DEPTH: Average depth, in meters, of secondary sampling unit.
  • MAX_HARD_RELIEF: The maximum height, in meters, of hard relief (e.g. hard corals, rock).
  • MAX_SOFT_RELIEF: The maximum height, in meters, of soft relief (e.g. soft corals, sponges).
  • AVG_HARD_RELIEF: The average height, in meters, of hard relief.
  • HARD_REL_PCT_0: Percentage of hard relief less than 0.2 meters.
  • HARD_REL_PCT_1: Percentage of hard relief between 0.2 and 0.5 meters.
  • HARD_REL_PCT_2: Percentage of hard relief between 0.5 and 1.0 meters.
  • HARD_REL_PCT_3: Percentage of hard relief between 1.0 and 1.5 meters.
  • HARD_REL_PCT_4: Percentage of hard relief greater than 1.5 meters.
  • PCT_SAND: Percentage of abiotic cover that is sand.
  • PCT_HARD_BOTTOM: Percentage of abiotic cover that is hard bottom.
  • PCT_RUBBLE: Percentage of abiotic cover that is rubble.
  • PCT_CORAL: Percentage of biotic hardbottom that is coral.
  • PCT_OCTO: Percentage of biotic hardbottom that is octocoral.
  • PCT_SPONGE: Percentage of biotic hardbottom that is sponge.

Function: getDomainDensity returns:

  • YEAR
  • REGION
  • SPECIES_CD
  • density
  • var: Variance in average density per secondary sampling unit
  • n: Number of primary sampling units sampled
  • nm: Number of secondary sampling units sampled
  • N: Number of possible primary sample units
  • NM: Number of possible secondary sampling units
  • length_class: The length class or bin. Only present if length_bins is not NULL. The notation, [lower, upper), is inclusive of the lower bound, but exclusive of the upper bound
  • protected_status: The protected status. Only present if merge_protected is FALSE

Biodiversity metrics computed with weighted species abundance

Florida Keys domain

#variables: YEAR, REGION SPECIES_CD, abundance, var, n, nm, N, NM, protected_status

domain_fk_abun <- read_csv('big_csv/abundance_domain/domain_fk_abun.csv', 
col_types = cols(
  YEAR = col_integer(),
  REGION = col_character(),
  SPECIES_CD = col_character(),
  abundance = col_double(),
  var = col_double(),
  n = col_integer(),
  nm = col_integer(),
  N = col_double(),
  NM = col_double(),
  protected_status = col_character()
))

##Percent of the abundance data is from species identified to the genus? 

## FK's domain abundance 
fk_domain_abun_diversity = domain_fk_abun %>% #15,771 * 10
  select(YEAR, protected_status, SPECIES_CD, abundance) %>% #27,911 * 4 
  group_by(YEAR, protected_status) %>% ##45 groups (fk 15*3)
  nest(-YEAR) %>%  
  mutate(
    data_wide = map(data, ~ spread(data=.x, SPECIES_CD, abundance, fill =0)), 
    dom_richness = map( #species richness 
      data_wide, #why are there 116 rows??? 
      function(x) specnumber(x)),
    dom_simpson = map( #simpson diversity as effective number of species 
      data_wide,
      function(x) 1/(1 - diversity(x, index = 'simpson'))),
    dom_shannon = map( #shannon diversity as effective number of species 
      data_wide,
      function(x) exp(diversity(x, index = 'shannon')))) %>%
  unnest(dom_richness, dom_simpson, dom_shannon)

fk_domain_abun_diversity = fk_domain_abun_diversity %>%
  select(dom_richness,dom_simpson,dom_shannon)

write_csv(fk_domain_abun_diversity, 'big_csv/abundance_domain/domain_fk_abun_diversity')

## TO DO: compute SD for each year, Confidence Interval, statistical differences between 
domain_fk_abun_diversity = read_csv('big_csv/abundance_domain/domain_fk_abun_diversity.csv')
## Parsed with column specification:
## cols(
##   YEAR = col_integer(),
##   protected_status = col_character(),
##   dom_richness = col_integer(),
##   dom_simpson = col_double(),
##   dom_shannon = col_double()
## )
## Simpson for protected and unprotected stacked 
fk_simpson = domain_fk_abun_diversity %>% 
  filter(protected_status != 'all') %>%
  select(YEAR, protected_status, dom_simpson) %>%
  mutate(
    protected_status = recode(
      protected_status, '0'='Unprotected', '1'='Protected')) %>%
  spread(protected_status, dom_simpson)

dygraph(fk_simpson, main = "Simpson Reef Fish Diversity") %>% 
  dyAxis("y", label = "Effective Number of Species", valueRange = c(0, 50)) %>%
  dyAxis("x", label = "Year") %>%
  dyOptions(stackedGraph = T)%>% 
  dyRangeSelector(height = 20) 
## Shannon for protected and unprotected stacked 
fk_shannon = domain_fk_abun_diversity %>% 
  filter(protected_status != 'all') %>%
  select(YEAR, protected_status, dom_shannon) %>%
  mutate(
    protected_status = recode(
      protected_status, '0'='Unprotected', '1'='Protected')) %>%
  spread(protected_status, dom_shannon)

dygraph(fk_shannon, main = "Shannon Reef Fish Diversity") %>% 
  dyAxis("y", label = "Effective Number of Species", valueRange = c(0, 75)) %>%
  dyAxis("x", label = "Year") %>%
  dyOptions(stackedGraph = T)%>% 
  dyRangeSelector(height = 20) 
d = domain_fk_abun_diversity %>% 
  filter(protected_status != 0) %>% 
  filter(protected_status != 1)

#Simpson for "all" filled 
dygraph_simp = d %>%
  select(YEAR, dom_simpson)

dygraph(dygraph_simp, main = "Simpson Diversity of Reef Fish in FKNMS") %>%
  dyOptions(fillGraph = T, fillAlpha = 0.4, drawPoints = T, pointSize = 2) %>%
  dyAxis("y", label = "Effective Number of Species") %>% 
  dyAxis("x", label = "Year")
## Simpson and Shannon stacked for "all"
dygraph_simp_shannon = d %>%
  select(YEAR, dom_simpson, dom_shannon)

dygraph(dygraph_simp_shannon, main = "Florida Keys Reef Fish Biodiversity") %>% 
  dyAxis("y", label = "Effective Number of Species", valueRange = c(0, 70)) %>%
  dyAxis("x", label = "Year") %>%
  dySeries("dom_shannon", label = "Shannon Diversity", color = "blue") %>% 
  dySeries("dom_simpson", label = "Simpson Diversity", color = "red") %>% 
  dyOptions(stackedGraph = T, fillAlpha = 0.6, axisLineWidth = 1.5) %>% #drawGrid = F
  dyLegend(width = 400)
#Richness, Shannon, Simpson stacked for "all"
dygraph_rich_simp_shannon = d %>%
  select(YEAR, dom_simpson, dom_shannon, dom_richness)

dygraph(dygraph_rich_simp_shannon, main = "Florida Keys Reef Fish Biodiversity") %>% 
  dyAxis("y", label = "Effective Number of Species") %>% 
  dyAxis("x", label = "Year") %>%
  dySeries("dom_richness", label = "Richness", color = "purple") %>%
  dySeries("dom_shannon", label = "Shannon", color = "blue") %>% 
  dySeries("dom_simpson", label = "Simpson", color = "red") %>% 
  dyOptions(stackedGraph = T, fillAlpha = 0.6, axisLineWidth = 1.5) %>% #drawGrid = F
  dyLegend(width = 400)
domain_fk_abun_diversity = read_csv('big_csv/abundance_domain/domain_fk_abun_diversity.csv')
## Parsed with column specification:
## cols(
##   YEAR = col_integer(),
##   protected_status = col_character(),
##   dom_richness = col_integer(),
##   dom_simpson = col_double(),
##   dom_shannon = col_double()
## )
# Parsed with column specification:
# cols(
#   YEAR = col_integer(),
#   protected_status = col_character(),
#   dom_richness = col_integer(),
#   dom_simpson = col_double(),
#   dom_shannon = col_double()
# )
protected_status_names <- c(
  "0" = "Unprotected",
  "1" = "Protected",
  "all" = "All"
  )
#richness, simpson, shannon for domain FKNMS  
FK_abun_RShSim <- ggplot(data = fk_domain_abun_diversity, aes(x = YEAR)) +
  geom_point(aes(y = dom_richness, color = "dom_richness")) +
  geom_point(aes(y = dom_simpson, color = "dom_simpson")) +
  geom_point(aes(y = dom_shannon, color = "dom_shannon")) +
  geom_line(aes(y = dom_richness, color = "dom_richness")) +
  geom_line(aes(y = dom_simpson, color = "dom_simpson")) +
  geom_line(aes(y = dom_shannon, color = "dom_shannon")) +
  facet_grid(protected_status ~ ., labeller = as_labeller(protected_status_names)) +
  labs(x = "Year", y = "Effective Number of Species", color = "Diversity indices") +
  scale_colour_manual(values = c("darkturquoise", "magenta", "blue"), labels=c("Species Richness", "Shannon Diversity", "Simpson Diversity")) +
  ggtitle("Reef Fish Biodiversity in the FKNMS") +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold', size = 12))+
  scale_x_continuous(breaks = c(1999:2016))+
  scale_y_continuous(limits= c(0, 300), breaks = c(0,25,50,75,100,125,150,175,200,225,250,275,300))


#simpson, shannon by protected status for domain FKNMS
FK_abun_ShSim <-ggplot(data = fk_domain_abun_diversity, aes(x = YEAR)) +
  geom_point(aes(y = dom_simpson, color = "dom_simpson")) +
  geom_point(aes(y = dom_shannon, color = "dom_shannon")) +
  geom_line(aes(y = dom_simpson, color = "dom_simpson")) +
  geom_line(aes(y = dom_shannon, color = "dom_shannon")) +
  facet_grid(protected_status ~ ., labeller = as_labeller(protected_status_names)) +
  labs(x = "Year", y = "Effective Number of Species", color = "Diversity index") +
  scale_colour_manual(values = c("magenta", "blue"), labels=c("Shannon diversity", "Simpson Diversity")) +
  ggtitle("Reef Fish Biodiversity in the Florida Keys") +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold', size = 12))+
  scale_x_continuous(limits = c(1999, 2016), breaks = c(1999:2016)) +
  scale_y_continuous(limits= c(0, 250), breaks = c(5,10,15,20,25,30,35,40,45,50))

#species richness by protected status for domain FKNMS
FK_abun_Rich <- ggplot(data = fk_domain_abun_diversity, aes(x = YEAR)) +
  geom_point(aes(y = dom_richness, color = "dom_richness"),color = "darkturquoise") +
  geom_line(aes(y = dom_richness, color = "dom_richness"), color = "darkturquoise") +
  facet_grid(protected_status ~ ., labeller = as_labeller(protected_status_names)) +
  labs(x = "Year", y = "Effective Number of Species", color = "Level of protection") +
  ggtitle("Species Richness of Reef Fish in the Florida Keys") +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold', size = 12))+
  scale_x_continuous(limits = c(1999, 2016), breaks = c(1999:2016)) +
  scale_y_continuous(limits= c(0, 250), breaks = c(0,50, 100, 150, 200, 250, 300))
## wonky 

#shannon by protected status for domain FKNMS
FK_abun_Shan <- ggplot(data = fk_domain_abun_diversity, aes(x = YEAR)) +
  geom_point(aes(y = dom_shannon, color = "dom_shannon"),color = "magenta") +
  geom_line(aes(y = dom_shannon, color = "dom_shannon"), color = "magenta") +
  facet_grid(protected_status ~ ., labeller = as_labeller(protected_status_names)) +
  labs(x = "Year", y = "Effective Number of Species", color = "Level of protection") +
  #scale_colour_manual(labels=c("Unprotected", "Protected", "All")) +
  ggtitle("Shannon Diveristy of Reef Fish in the Florida Keys") +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold', size = 12))+
  scale_x_continuous(limits = c(1999, 2016), breaks = c(1999:2016)) +
  scale_y_continuous(limits= c(0, 50), breaks = c(5,10,15,20,25,30,35,40,45,50))

#simpson by protected status for domain FKNMS
FK_abun_Simp <- ggplot(data = fk_domain_abun_diversity, aes(x = YEAR)) +
  geom_point(aes(y = dom_simpson, color = "dom_simpson"),color = "blue") +
  geom_line(aes(y = dom_simpson, color = "dom_simpson"), color = "blue") +
  facet_grid(protected_status ~ ., labeller = as_labeller(protected_status_names)) +
  labs(x = "Year", y = "Effective Number of Species", color = "Level of protection") +
  #scale_colour_manual(labels=c("Unprotected", "Protected", "All")) +
  ggtitle("Simpson Diveristy of Reef Fish in the Florida Keys") +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold', size = 12))+
  scale_x_continuous(limits = c(1999, 2016), breaks = c(1999:2016)) +
  scale_y_continuous(limits= c(0, 25), breaks = c(5,10,15,20,25))

Dry Tortugas domain

domain_dt_abun <-
  read_csv('big_csv/abundance_domain/domain_dt_abun.csv',
    col_types = cols(
      YEAR = col_integer(),
      REGION = col_character(),
      SPECIES_CD = col_character(),
      abundance = col_double(),
      var = col_double(),
      n = col_integer(),
      nm = col_integer(),
      N = col_double(),
      NM = col_double(),
      protected_status = col_character()
      ))

#protected status: 0 - not protected; 1 - protected; all - both 

dt_domain_abun_diversity = domain_dt_abun %>% #12,140 * 10
  select(YEAR, protected_status, SPECIES_CD, abundance) %>% #12,140 * 4 
  group_by(YEAR, protected_status) %>% ##36 groups (fk 9*4)
  nest(-YEAR) %>%  
  mutate(
    data_wide = map(data, ~ spread(data=.x, SPECIES_CD, abundance, fill =0)), 
    dom_richness = map( #species richness 
      data_wide, #why are there 116 rows??? 
      function(x) specnumber(x)),
    dom_simpson = map( #simpson diversity as effective number of species 
      data_wide,
      function(x) 1/(1 - diversity(x, index = 'simpson'))),
    dom_shannon = map( #shannon diversity as effective number of species 
      data_wide,
      function(x) exp(diversity(x, index = 'shannon')))) %>%
  unnest(dom_richness, dom_simpson, dom_shannon)

domain_dt_abun_diversity = dt_domain_abun_diversity %>%
  select(YEAR, protected_status, dom_richness, dom_simpson, dom_shannon) 
write_csv(domain_dt_abun_diversity, 'big_csv/abundance_domain/domain_dt_abun_diversity.csv')


#TODO: why does DT have 4 options underprotected status?? 0,1,2,all 
domain_dt_abun_diversity = read_csv("big_csv/abundance_domain/domain_dt_abun_diversity.csv")
## Parsed with column specification:
## cols(
##   YEAR = col_integer(),
##   protected_status = col_character(),
##   dom_richness = col_integer(),
##   dom_simpson = col_double(),
##   dom_shannon = col_double()
## )
dt_protected_status_names <- c(
  "0" = "0 - Unprotected",
  "1" = "1 - Protected?",
  "2" = "2 - Protected?",
  "all" = "All"
  )

#richness, simpson, shannon for domain FKNMS  
ggplot(data = dt_domain_abun_diversity, aes(x = YEAR)) +
  geom_point(aes(y = dom_richness, color = "dom_richness")) +
  geom_point(aes(y = dom_simpson, color = "dom_simpson")) +
  geom_point(aes(y = dom_shannon, color = "dom_shannon")) +
  geom_line(aes(y = dom_richness, color = "dom_richness")) +
  geom_line(aes(y = dom_simpson, color = "dom_simpson")) +
  geom_line(aes(y = dom_shannon, color = "dom_shannon")) +
  facet_grid(protected_status ~ ., labeller = as_labeller(protected_status_names)) +
  labs(x = "Year", y = "Effective Number of Species", color = "Diversity indices") +
  scale_colour_manual(values = c("darkturquoise", "magenta", "blue"), labels=c("Species Richness", "Shannon Diversity", "Simpson Diversity")) +
  ggtitle("Reef Fish Biodiversity in Dry Tortugas") +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold', size = 12))+
  scale_x_continuous(breaks = c(1999:2016))+
  scale_y_continuous(limits= c(0, 300), breaks = c(0,25,50,75,100,125,150,175,200,225,250,275,300))

#simpson, shannon by protected status for domain FKNMS
ggplot(data = dt_domain_abun_diversity, aes(x = YEAR)) +
  geom_point(aes(y = dom_simpson, color = "dom_simpson")) +
  geom_point(aes(y = dom_shannon, color = "dom_shannon")) +
  geom_line(aes(y = dom_simpson, color = "dom_simpson")) +
  geom_line(aes(y = dom_shannon, color = "dom_shannon")) +
  facet_grid(protected_status ~ ., labeller = as_labeller(protected_status_names)) +
  labs(x = "Year", y = "Effective Number of Species", color = "Diversity index") +
  scale_colour_manual(values = c("magenta", "blue"), labels=c("Shannon diversity", "Simpson Diversity")) +
  ggtitle("Reef Fish Biodiversity in Dry Tortugas") +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold', size = 12))+
  scale_x_continuous(limits = c(1999, 2016), breaks = c(1999:2016)) +
  scale_y_continuous(limits= c(0, 50), breaks = c(5,10,15,20,25,30,35,40,45,50))

#species richness by protected status for domain FKNMS
ggplot(data = dt_domain_abun_diversity, aes(x = YEAR)) +
  geom_point(aes(y = dom_richness, color = "dom_richness"),color = "darkturquoise") +
  geom_line(aes(y = dom_richness, color = "dom_richness"), color = "darkturquoise") +
  facet_grid(protected_status ~ ., labeller = as_labeller(protected_status_names)) +
  labs(x = "Year", y = "Effective Number of Species", color = "Level of protection") +
  ggtitle("Species Richness of Reef Fish in Dry Tortugas") +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold', size = 12))+
  scale_x_continuous(limits = c(1999, 2016), breaks = c(1999:2016)) +
  scale_y_continuous(limits= c(0, 250), breaks = c(0,50, 100, 150, 200, 250, 300))
## Warning: Removed 1 rows containing missing values (geom_point).

## wonky 

#shannon by protected status for domain FKNMS
ggplot(data = dt_domain_abun_diversity, aes(x = YEAR)) +
  geom_point(aes(y = dom_shannon, color = "dom_shannon"),color = "magenta") +
  geom_line(aes(y = dom_shannon, color = "dom_shannon"), color = "magenta") +
  facet_grid(protected_status ~ ., labeller = as_labeller(protected_status_names)) +
  labs(x = "Year", y = "Effective Number of Species", color = "Level of protection") +
  #scale_colour_manual(labels=c("Unprotected", "Protected", "All")) +
  ggtitle("Shannon Diveristy of Reef Fish in Dry Tortugas") +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold', size = 12))+
  scale_x_continuous(limits = c(1999, 2016), breaks = c(1999:2016)) +
  scale_y_continuous(limits= c(0, 50), breaks = c(5,10,15,20,25,30,35,40,45,50))

#simpson by protected status for domain FKNMS
ggplot(data = dt_domain_abun_diversity, aes(x = YEAR)) +
  geom_point(aes(y = dom_simpson, color = "dom_simpson"),color = "blue") +
  geom_line(aes(y = dom_simpson, color = "dom_simpson"), color = "blue") +
  facet_grid(protected_status ~ ., labeller = as_labeller(protected_status_names)) +
  labs(x = "Year", y = "Effective Number of Species", color = "Level of protection") +
  #scale_colour_manual(labels=c("Unprotected", "Protected", "All")) +
  ggtitle("Simpson Diveristy of Reef Fish in Dry Tortugas") +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold', size = 12))+
  scale_x_continuous(limits = c(1999, 2016), breaks = c(1999:2016)) +
  scale_y_continuous(limits= c(0, 25), breaks = c(5,10,15,20,25))

Florida Keys strata’s:

  1. FMLR - Forereef Midchannel Linear Reef
  2. FSLR - Forereef Shallow Linear Reef
  3. HRRF - High Relief Reef (Spur and Groove)
  4. INPR - Inshore patch reef
  5. MCPR - Midchannel Patch Reef
  6. OFPR - Offshore Patch Reef
  7. FDLR - Forereef Deep Linear Reef
## FK's abundance by strata 
strata_fk_abun <- read_csv("big_csv/abundance_strata/strata_fk_abun.csv",
col_types = cols(
  YEAR = col_integer(),
  REGION = col_character(),
  STRAT = col_character(),
  SPECIES_CD = col_character(),
  abundance = col_double(),
  var = col_double(),
  n = col_integer(),
  nm = col_integer(),
  N = col_double(),
  NM = col_double(),
  protected_status = col_character()
))

fk_strata_abun_diversity = strata_fk_abun %>% #133,762 * 12
  filter(protected_status!='all') %>% #66,881 * 12
  select(YEAR, STRAT, protected_status, SPECIES_CD, abundance) %>% #66,881 * 5
  group_by(YEAR, STRAT, protected_status) %>% #294 groups (15 years *7 strata *3prot = 315)??
  nest() %>%  # 294 * 4 
  mutate(
    data_wide = map(data, ~ spread(data=.x, SPECIES_CD, abundance, fill =0)), 
    richness = map( #species richness
      data_wide, 
      function(x) specnumber(x)),
    simpson = map( #simpson diversity as effective number of species 
      data_wide,
      function(x) 1/(1 - diversity(x, index = 'simpson'))),
    shannon = map( #shannon diversity as effective number of species 
      data_wide,
      function(x) exp(diversity(x, index = 'shannon')))) %>%
  unnest(richness, simpson, shannon)

fk_strata_abun_diversity = fk_strata_abun_diversity %>%
  select(YEAR, STRAT, protected_status, richness, simpson, shannon) 
write_csv(fk_strata_abun_diversity, 'big_csv/abundance_strata/strata_fk_abun_diversity.csv')
strata_fk_abun_diversity = read_csv("big_csv/abundance_strata/strata_fk_abun_diversity.csv")

# strata_names <- c(
#  "FMLR" = "Forereef Midchannel Linear Reef",
#  "FSLR" = "Forereef Shallow Linear Reef",
#  "HRRF" = "High Relief Reef", #(Spur and Groove)
#  "INPR" = "Inshore patch reef",
#  "MCPR" = "Midchannel Patch Reef",
#  "OFPR" = "Offshore Patch Reef",
#  "FDLR" = "Forereef Deep Linear Reef")

strata_names <- c(
 "FMLR" = "FMLR",
 "FSLR" = "FSLR",
 "HRRF" = "HRRF", #(Spur and Groove)
 "INPR" = "INPR",
 "MCPR" = "MCPR",
 "OFPR" = "OFPR",
 "FDLR" = "FDLR")

#simpson by protected status for domain FKNMS
FK_strata_ab_Sim <- ggplot(data = fk_strata_abun_diversity, aes(x = YEAR)) +
  geom_point(aes(y = strata_simpson, color = "strata_simpson"),color = "blue") +
  geom_line(aes(y = strata_simpson, color = "strata_simpson"), color = "blue") +
  facet_grid(STRAT ~ protected_status, 
             labeller = labeller(.rows = strata_names, .cols = protected_status_names)) +
             #labeller(strata_names = label_wrap_gen(3))) + 
  labs(x = "Year", y = "Effective Number of Species") +
  ggtitle("Simpson Diveristy of Reef Fish in the Florida Keys") + 
  theme(plot.title = element_text(hjust = 0.5, face = 'bold', size = 12)) +
  scale_x_continuous(limits = c(1999, 2016), breaks = c(1999:2016)) +
  scale_y_continuous(limits= c(0, 25), breaks = c(5,10,15,20,25))

ggplot(data = fk_strata_abun_diversity, aes(x = YEAR)) +
  geom_point(aes(y = strata_shannon, color = "strata_shannon"), color = "magenta") +
  geom_line(aes(y = strata_shannon, color = "strata_shannon"), color = "magenta") +
  facet_grid(STRAT ~ protected_status, labeller = as_labeller(protected_status_names)) +
  labs(x = "Year", y = "Effective Number of Species", color = "Level of protection") +
  ggtitle("Shannon Diveristy of Reef Fish in the Florida Keys by Strata") + 
  theme(plot.title = element_text(hjust = 0.5, face = 'bold', size = 12))+
  scale_x_continuous(limits = c(1999, 2016), breaks = c(1999:2016)) +
  scale_y_continuous(limits= c(0, 25), breaks = c(5,10,15,20,25))

Dry Tortugas strata’s:

  1. CONT_LR (continuous low relief)
  2. CONT_MR (continuous mid relief)
  3. CONT_HR (continuous high relief)
  4. ISOL_LR (isolated low relief)
  5. ISOL_MR (isolated mid relief)
  6. ISOL_HR (isolated high relief)
  7. SPGR_LR (spur and groove low relief)
  8. SPGR_HR (spur and groove high relief)
## DT's abundance by strata 
strata_dt_abun <- read_csv('big_csv/abundance_strata/strata_dt_abun.csv')
# Parsed with column specification:
# cols(
#   YEAR = col_integer(),
#   REGION = col_character(),
#   STRAT = col_character(),
#   PROT = col_integer(),
#   SPECIES_CD = col_character(),
#   abundance = col_double(),
#   var = col_double(),
#   n = col_integer(),
#   nm = col_integer(),
#   N = col_double(),
#   NM = col_double()
# )

dt_strata_abun_diversity = strata_dt_abun %>% #133,762 * 12
  group_by(YEAR, STRAT, PROT) %>% #150 groups 
  nest(-YEAR) %>%
  mutate(
    data_wide = map(data, ~ spread(data=.x, SPECIES_CD, abundance, fill =0)), 
    strata_richness = map( #species richness
      data_wide, 
      function(x) specnumber(x %>% select(-REGION, -var, -n, -nm, -N, -NM))),
    strata_simpson = map( #simpson diversity as effective number of species 
      data_wide,
      function(x) 1/(1 - diversity(x %>% select(-REGION, -var, -n, -nm, -N, -NM), index = 'simpson'))),
    strata_shannon = map( #shannon diversity as effective number of species 
      data_wide,
      function(x) exp(diversity(x %>% select(-REGION, -var, -n, -nm, -N, -NM), index = 'shannon')))) %>%
  unnest(strata_richness, strata_simpson, strata_shannon)

strata_dt_abun_diversity = dt_strata_abun_diversity %>%
  select(YEAR, STRAT, PROT, strata_richness, strata_simpson, strata_shannon)
write_csv(strata_dt_abun_diversity, "big_csv/abundance_strata/strata_dt_abun_diversity.csv")
strata_dt_abun_diversity = read_csv("big_csv/abundance_strata/strata_dt_abun_diversity.csv")
## Parsed with column specification:
## cols(
##   YEAR = col_integer(),
##   STRAT = col_character(),
##   PROT = col_integer(),
##   strata_richness = col_integer(),
##   strata_simpson = col_double(),
##   strata_shannon = col_double()
## )

Florida Keys PSU

psu_fk_abun <- read_csv("big_csv/abundance/abundance_psu/psu_fk_abun.csv",
col_types = cols(
  YEAR = col_integer(),
  REGION = col_character(),
  STRAT = col_character(),
  PROT = col_integer(),
  PRIMARY_SAMPLE_UNIT = col_character(),
  SPECIES_CD = col_character(),
  m = col_integer(),
  var = col_double(),
  abundance = col_double(),
  protected_status = col_character()))

fk_psu_abun_diversity = psu_fk_abun %>% #3,134,710 × 10
  group_by(YEAR, PRIMARY_SAMPLE_UNIT, protected_status)%>% #8,814 groups 
  nest(-YEAR) %>%
  mutate(
    data_wide = map(data, ~ spread(data=.x, SPECIES_CD, abundance, fill =0)), 
    psu_richness = map( #species richness
      data_wide, 
      function(x) specnumber(x %>% select(-REGION, -STRAT, - PROT, -m, -var))),
    psu_simpson = map( #simpson diversity as effective number of species 
      data_wide,
      function(x) 1/(1 - diversity(x %>% select(-REGION, -STRAT, - PROT, -m, -var), index = 'simpson'))),
    psu_shannon = map( #shannon diversity as effective number of species 
      data_wide,
      function(x) exp(diversity(x %>% select(-REGION, -STRAT, - PROT, -m, -var), index = 'shannon')))) %>% 
  unnest(psu_richness,psu_simpson, psu_shannon)
 
psu_fk_abun_diversity = fk_psu_abun_diversity %>%
  select(YEAR, PRIMARY_SAMPLE_UNIT, protected_status,psu_richness, psu_simpson, psu_shannon)%>% 
unnest(psu_richness,psu_simpson, psu_shannon) %>% 
write_csv(psu_fk_abun_diversity, "big_csv/abundance_psu/psu_fk_abun_diversity.csv") 
psu_fk_abun_diversity = read_csv("big_csv/abundance_psu/psu_fk_abun_diversity.csv", 
          col_types = cols(
            YEAR = col_integer(),
            PRIMARY_SAMPLE_UNIT = col_character(),
            protected_status = col_character(),
            psu_richness = col_integer(),
            psu_simpson = col_double(),
            psu_shannon = col_double()))

Dry Tortugas PSU

dt_psu_abun <- read_csv("big_csv/abundance_psu/psu_dt_abun.csv")
# Parsed with column specification:
# cols(
#   YEAR = col_integer(),
#   REGION = col_character(),
#   STRAT = col_character(),
#   PROT = col_integer(),
#   PRIMARY_SAMPLE_UNIT = col_character(),
#   SPECIES_CD = col_character(),
#   m = col_integer(),
#   var = col_double(),
#   abundance = col_double()
# )

dt_psu_abun_diversity = dt_psu_abun %>% #934,402 * 9 
  select(YEAR, STRAT, PRIMARY_SAMPLE_UNIT, PROT, SPECIES_CD, abundance) %>% 
  group_by(YEAR, PRIMARY_SAMPLE_UNIT)%>% #2,702 groups 
  nest(-YEAR) %>% #
  mutate(
    data_wide = map(data, ~ spread(data=.x, SPECIES_CD, abundance, fill =0)), 
    psu_richness = map( #species richness
      data_wide, 
      function(x) specnumber(x %>% select(-STRAT, -PROT))),
    psu_simpson = map( #simpson diversity as effective number of species 
      data_wide,
      function(x) 1/(1 - diversity(x %>% select(-STRAT, -PROT), index = 'simpson'))),
    psu_shannon = map( #shannon diversity as effective number of species 
      data_wide,
      function(x) exp(diversity(x %>% select(-STRAT, - PROT), index = 'shannon')))) %>% 
  unnest(psu_richness,psu_simpson, psu_shannon)
 
psu_dt_abun_diversity = dt_psu_abun_diversity %>%
  select(YEAR, PRIMARY_SAMPLE_UNIT, psu_richness, psu_simpson, psu_shannon)

write_csv(psu_dt_abun_diversity, "big_csv/abundance_psu/psu_dt_abun_diversity.csv") 
psu_dt_abun_diversity = read_csv("big_csv/abundance_psu/psu_dt_abun_diversity.csv")
## Parsed with column specification:
## cols(
##   YEAR = col_integer(),
##   PRIMARY_SAMPLE_UNIT = col_character(),
##   psu_richness = col_integer(),
##   psu_simpson = col_double(),
##   psu_shannon = col_double()
## )

Biodiversity metrics by computed with weighted species density

Florida Keys domain

domain_fk_density <- read_csv("big_csv/density_domain/domain_fk_density.csv", 
col_types = cols(
  YEAR = col_integer(),
  REGION = col_character(),
  SPECIES_CD = col_character(),
  density = col_double(),
  var = col_double(),
  n = col_integer(),
  nm = col_integer(),
  N = col_double(),
  NM = col_double(),
  protected_status = col_character()
))

## FK's domain density 
domain_fk_density = domain_fk_density %>% #15,771 * 10
  select(YEAR, protected_status, SPECIES_CD, density) %>% #27,911 * 4 
  group_by(YEAR, protected_status) %>% ##45 groups (fk 15*3)
  nest(-YEAR) %>%  
  mutate(
    data_wide = map(data, ~ spread(data=.x, SPECIES_CD, density, fill =0)), 
    dom_richness = map( #species richness 
      data_wide, #why are there 116 rows??? 
      function(x) specnumber(x)),
    dom_simpson = map( #simpson diversity as effective number of species 
      data_wide,
      function(x) 1/(1 - diversity(x, index = 'simpson'))),
    dom_shannon = map( #shannon diversity as effective number of species 
      data_wide,
      function(x) exp(diversity(x, index = 'shannon')))) %>%
  unnest(dom_richness, dom_simpson, dom_shannon)

domain_fk_density_diversity = domain_fk_density %>%
  select(YEAR, protected_status, dom_richness, dom_simpson, dom_shannon) 

write_csv(domain_fk_density_diversity, 'big_csv/density_domain/domain_fk_density_diversity.csv')
#simpson, shannon by protected status for domain FKNMS
ggplot(data = domain_fk_density, aes(x = YEAR)) +
  geom_point(aes(y = dom_simpson, color = "dom_simpson")) +
  geom_point(aes(y = dom_shannon, color = "dom_shannon")) +
  geom_line(aes(y = dom_simpson, color = "dom_simpson")) +
  geom_line(aes(y = dom_shannon, color = "dom_shannon")) +
  facet_grid(protected_status ~ ., labeller = as_labeller(protected_status_names)) +
  labs(x = "Year", y = "Effective Number of Species", color = "Diversity index") +
  scale_colour_manual(values = c("magenta", "blue"), labels=c("Shannon diversity", "Simpson Diversity")) +
  ggtitle("Reef Fish Biodiversity in the Florida Keys") +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold', size = 12))+
  scale_x_continuous(breaks = c(1999:2016)) +
  scale_y_continuous(limits= c(0, 50), breaks = c(5,10,15,20,25,30,35,40,45,50))

Dry Tortugas domain

domain_dt_density <- read_csv("big_csv/density_domain/domain_dt_density.csv", 
col_types = cols(
  YEAR = col_integer(),
  REGION = col_character(),
  SPECIES_CD = col_character(),
  density = col_double(),
  var = col_double(),
  n = col_integer(),
  nm = col_integer(),
  N = col_double(),
  NM = col_double(),
  protected_status = col_character()
))

#protected status: 0 - not protected; 1 - protected; all - both 

domain_dt_density = domain_dt_density %>% #12,140 * 10
  select(YEAR, protected_status, SPECIES_CD, density()) %>% #12,140 * 4 
  group_by(YEAR, protected_status) %>% ##36 groups (fk 9*4)
  nest(-YEAR) %>%  
  mutate(
    data_wide = map(data, ~ spread(data=.x, SPECIES_CD, density, fill =0)), 
    dom_richness = map( #species richness 
      data_wide, #why are there 116 rows??? 
      function(x) specnumber(x)),
    dom_simpson = map( #simpson diversity as effective number of species 
      data_wide,
      function(x) 1/(1 - diversity(x, index = 'simpson'))),
    dom_shannon = map( #shannon diversity as effective number of species 
      data_wide,
      function(x) exp(diversity(x, index = 'shannon')))) %>%
  unnest(dom_richness, dom_simpson, dom_shannon)

domain_dt_density_diversity = domain_dt_density %>%
  select(YEAR, protected_status, dom_richness, dom_simpson, dom_shannon) 

write_csv(domain_dt_density_diversity, 'big_csv/density_domain/domain_dt_density_diversity.csv')

#TODO: why does DT have 4 options underprotected status?? 0,1,2,all 

Florida Keys strata

## FK's density by strata 
strata_fkdensity_1999_2016 <- read_csv("big_csv/density_strata/strata_fk_density.csv",
col_types = cols(
  YEAR = col_integer(),
  REGION = col_character(),
  SPECIES_CD = col_character(),
  density = col_double(),
  var = col_double(),
  n = col_integer(),
  nm = col_integer(),
  N = col_double(),
  NM = col_double(),
  protected_status = col_character()
))

fk_strata_density_diversity = strata_fkdensity_1999_2016 %>% #133,762 * 12
  filter(protected_status!='all') %>% #66,881 * 12
  select(YEAR, STRAT, protected_status, SPECIES_CD, density) %>% #66,881 * 5
  group_by(YEAR, STRAT, protected_status) %>% #294 groups (15 years *7 strata *3prot = 315)??
  nest() %>%
   mutate(
    data_wide = map(data, ~ spread(data=.x, SPECIES_CD, density, fill =0)), 
    strata_richness = map( #species richness
      data_wide, 
      function(x) specnumber(x)),
    strata_simpson = map( #simpson diversity as effective number of species 
      data_wide,
      function(x) 1/(1 - diversity(x, index = 'simpson'))),
    strata_shannon = map( #shannon diversity as effective number of species 
      data_wide,
      function(x) exp(diversity(x, index = 'shannon')))) %>%
  unnest(strata_richness, strata_simpson, strata_shannon)
ggplot(data = fk_strata_density_diversity, aes(x = YEAR)) +
  geom_point(aes(y = strata_simpson, color = "strata_simpson"),color = "blue") +
  geom_line(aes(y = strata_simpson, color = "strata_simpson"), color = "blue") +
  facet_grid(STRAT ~ protected_status, 
             labeller = labeller(.rows = strata_names, .cols = protected_status_names)) +
             #labeller(strata_names = label_wrap_gen(3))) + 
  labs(x = "Year", y = "Effective Number of Species") +
  ggtitle("Simpson Diveristy of Reef Fish in the Florida Keys") + 
  theme(plot.title = element_text(hjust = 0.5, face = 'bold', size = 12)) +
  scale_x_continuous(limits = c(1999, 2016), breaks = c(1999:2016)) +
  scale_y_continuous(limits= c(0, 25), breaks = c(5,10,15,20,25))

Dry Tortugas strata

## DT's density by strata 
strata_dtdensity_1999_2016 <- read_csv("big_csv/density_strata/strata_dtdensity_1999_2016.csv",
col_types = cols(
  YEAR = col_integer(),
  REGION = col_character(),
  SPECIES_CD = col_character(),
  density = col_double(),
  var = col_double(),
  n = col_integer(),
  nm = col_integer(),
  N = col_double(),
  NM = col_double(),
  protected_status = col_character()
))

dt_strata_diversity = strata_dtdensity_1999_2016 %>% #133,762 * 12
  group_by(YEAR, STRAT, protected_status) %>% #294 groups 
  nest(-YEAR) %>%
  mutate(
    data_wide = map(data, ~ spread(data=.x, SPECIES_CD, density, fill =0)), 
    strata_richness = map( #species richness
      data_wide, 
      function(x) specnumber(x %>% select(-PROT, -var, -n, -nm, -N, -NM))),
    strata_simpson = map( #simpson diversity as effective number of species 
      data_wide,
      function(x) 1/(1 - diversity(x %>% select(-PROT, -var, -n, -nm, -N, -NM), index = 'simpson'))),
    strata_shannon = map( #shannon diversity as effective number of species 
      data_wide,
      function(x) exp(diversity(x %>% select(-PROT, -var, -n, -nm, -N, -NM), index = 'shannon')))) %>%
  unnest(strata_richness, strata_simpson, strata_shannon)

strata_dt_density_diversity = dt_strata_diversity %>%
  select(YEAR, STRAT, protected_status, strata_richness, strata_simpson, strata_shannon)

write_csv(strata_dt_density_diversity, "density_strata/strata_dt_density_diversity")

Florida Keys PSU

psu_fkdensity_1999_2016 <- read_csv("big_csv/density_psu/psu_fkdensity_1999_2016.csv",
col_types = cols(
  YEAR = col_integer(),
  REGION = col_character(),
  SPECIES_CD = col_character(),
  density = col_double(),
  var = col_double(),
  n = col_integer(),
  nm = col_integer(),
  N = col_double(),
  NM = col_double(),
  protected_status = col_character()
))

fk_strata_diversity = strata_fkdensity_1999_2016 %>%
  group_by(YEAR, Primary_Sampling_Unit, protected_status)%>% 
  nest(-YEAR) %>%
  mutate(
    data_wide = map(data, ~ spread(data=.x, SPECIES_CD, density, fill =0)), 
    psu_richness = map( #species richness
      data_wide, 
      function(x) specnumber(x %>% select(-REGION, protected_status))),
    psu_simpson = map( #simpson diversity as effective number of species 
      data_wide,
      function(x) 1/(1 - diversity(x %>% select(-REGION, -protected_status), index = 'simpson'))),
    psu_shannon = map( #shannon diversity as effective number of species 
      data_wide,
      function(x) exp(diversity(x %>% select(-REGION, -protected_status), index = 'shannon'))))

Dry Tortugas PSU

psu_dtdensity_1999_2016 <- read_csv("big_csv/density_psu/psu_dtdensity_1999_2016.csv",
col_types = cols(
  YEAR = col_integer(),
  REGION = col_character(),
  SPECIES_CD = col_character(),
  density = col_double(),
  var = col_double(),
  n = col_integer(),
  nm = col_integer(),
  N = col_double(),
  NM = col_double(),
  protected_status = col_character()
))

dt_strata_diversity = strata_dtdensity_1999_2016 %>%
  group_by(YEAR, SPECIES_CD, density, protected_status)%>% 
  nest(-YEAR) %>%
  mutate(
      data_wide = map(data, ~ spread(data=.x, SPECIES_CD, density, fill =0)), 
    psu_richness = map( #species richness
      data_wide, 
      function(x) specnumber(x %>% select(-REGION, protected_status))),
    psu_simpson = map( #simpson diversity as effective number of species 
      data_wide,
      function(x) 1/(1 - diversity(x %>% select(-REGION, -protected_status), index = 'simpson'))),
    psu_shannon = map( #shannon diversity as effective number of species 
      data_wide,
      function(x) exp(diversity(x %>% select(-REGION, -protected_status), index = 'shannon'))))

Evironmental Variables from SERC