Analysis of FIA data

Author

D. S. Fernández del Viso

Objective

Calculate the relative frequency of each species in each inventory cycle, to answer the questions:

  • - What are the most common species in each inventory cycle?

  • - Do their frequency changed after hurricane María?

Data

# loading master data 
master <- read.csv("PR_CSV/PR_TREE.csv")

Create data frame of species count by year

library(tidyr)
library(dplyr)
# pivot table 
spcountyr <- master %>%
  group_by(SPCD, INVYR) %>%
  summarise(count = n()) %>%
  pivot_wider(names_from = INVYR, values_from = count, values_fill = 0)

Modify data frame

Sum the counts per year for each inventory cycle.

# with the new data frame sum the counts per year for each inventory cycle: 2001 to 2004, 2006 to 2009, 2011 to 2014, 2016 to 2019 using the mutate function
spcountyr <- spcountyr %>%
  mutate(Y2001_2004 = `2001` + `2002` + `2003` + `2004`,
         Y2006_2009 = `2006` + `2007` + `2008` + `2009`,
         Y2011_2014 = `2011` + `2012` + `2013` + `2014`,
         Y2016_2019 = `2016` + `2017` + `2018` + `2019`) %>%
  select(SPCD, Y2001_2004, Y2006_2009, Y2011_2014, Y2016_2019)
# transform data from int to numeric
spcountyr[] <- lapply(spcountyr, as.numeric)

Calculate relative frequency of each species in each inventory cycle

# calculate frequency relative of each species in each inventory cycle
spcountyrfreq <- spcountyr %>%
  mutate(freq2001_2004 = Y2001_2004 / sum(spcountyr$Y2001_2004),
         freq2006_2009 = Y2006_2009 / sum(spcountyr$Y2006_2009),
         freq2011_2014 = Y2011_2014 / sum(spcountyr$Y2011_2014),
         freq2016_2019 = Y2016_2019 / sum(spcountyr$Y2016_2019))

Species names

# load species names
spnames <- read.csv("data/REF_SPECIES.csv")
# merge spnames[GENUS, SPECIES] to spcountyrfreq using SPCD key
spcountyrfreqsp <- merge(spcountyrfreq, spnames, by = "SPCD")
# only GENUS and SPECIES columns
spcountyrfreqsp <- spcountyrfreqsp[, c(1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12)]
# order by frequency higher to lower using freq2001_2004
spcountyrfreqsp <- spcountyrfreqsp[order(spcountyrfreqsp$freq2016_2019, decreasing = TRUE), ]

Results

Table of frequencies for the 10 most abundant tree species (2014-2019 inventory)

# gt with first 6 rows and the following columns: GENUS, SPECIES, freq2001_2004, freq2006_2009, freq2011_2014, freq2016_2019 and frequency with 2 decimals
library(gt)
spcountyrfreqsp[1:10, c(10, 11, 6:9)] %>%
  gt() %>%
  tab_header(
    title = "Relative frequency of each species in each inventory cycle",
    subtitle = "First 10 species sorted according cycle 2016-2019"
  ) %>%
  fmt_number(
    columns = c(freq2001_2004, freq2006_2009, freq2011_2014, freq2016_2019),
    decimals = 2
  )
Relative frequency of each species in each inventory cycle
First 10 species sorted according cycle 2016-2019
GENUS SPECIES freq2001_2004 freq2006_2009 freq2011_2014 freq2016_2019
Spathodea campanulata 0.12 0.11 0.10 0.10
Guarea guidonia 0.06 0.06 0.06 0.07
Leucaena leucocephala 0.04 0.04 0.04 0.04
Tabebuia heterophylla 0.03 0.04 0.04 0.03
Cecropia schreberiana 0.03 0.03 0.03 0.03
Prestoea acuminata 0.03 0.02 0.03 0.03
Andira inermis 0.03 0.03 0.02 0.02
Prosopis pallida 0.01 0.02 0.02 0.02
Bursera simaruba 0.02 0.04 0.03 0.02
Syzygium jambos 0.03 0.02 0.02 0.02

Graph of species frequencies versus rank.

# bar graph of species frequencies versus rank order use GENUS and SPECIES as labels
library(ggplot2)
spcountyrfreqsp[1:10, c(10, 11, 6:9)] %>%
  ggplot(aes(x = reorder(paste(GENUS, SPECIES), freq2016_2019), y = freq2016_2019)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(
    title = "Relative frequency of each species",
    subtitle = "First 10 species sorted according cycle 2016-2019",
    x = "Species",
    y = "Frequency"
  )