Wrangling FoCo Tree Data for F311

Show the code
library(tidyverse)
Show the code
# downloaded all species and counts from: https://fortcollinsco.treekeepersoftware.com/index.cfm?deviceWidth=1366 ; only looking at tree species and count

df <- read_csv("../F311 Spring 2024/data/foco_trees.csv") %>% janitor::clean_names()

This data is available from: https://fortcollinsco.treekeepersoftware.com/index.cfm?deviceWidth=1366

Here, I have cleaned up the data (removing misspellings, combining varietals, etc) and matched species to their genus and family level associations to aid in applying to 10:20:30 “rule”.

Use the datatable (which can be exported, if desired) to answer the questions.

Show the code
# there are a bunch of common names and random crap in the species names...remove to yield genus and species separately. Then will need to join to something to identify families (to avoid having to do it manually...but wouldn't be so bad with generic level)

df_cleaned <- df %>% 
  mutate(extracted = str_extract(x1, "\\((.*?)\\)")) %>% 
# Step 2: Remove any text that is inside single quotes along with the quotes
  mutate(cleaned = str_replace_all(extracted, "'[^']*'", "")) %>% 
# Step 3: Remove special characters and all text following them
  mutate(cleaned = str_replace(cleaned, "[?\"'].*$", "")) %>% 
# Step 4: Remove 's' with whitespace around it and all text following it
  mutate(cleaned = str_replace(cleaned, "\\s+s\\s+.*$", "")) %>% 
# Final cleanup: Remove parentheses and trim whitespace
  mutate(binomial = str_trim(str_replace_all(cleaned, "[()]", ""))) %>% 
  mutate(binomial = str_remove(binomial, " Semi-Dwarf")) %>% 
  mutate(binomial = str_remove(binomial, "P.A.F.")) %>% 
  mutate(binomial = str_remove(binomial, " spp.")) %>% 
  mutate(binomial = str_remove(binomial, " sp.")) %>% 
  mutate(binomial = str_remove(binomial, " spp")) %>%
  filter(!str_detect(binomial, "Stump")) %>% 
  filter(binomial != "Unknown spp.") %>% 
  filter(binomial != "Unknown") %>% 
  filter(binomial != "Vacant Site") %>% 
  filter(binomial != "Vacant Site Retired") %>% 
  filter(binomial != "Bur x Swamp White") %>% 
  mutate(binomial = case_when(
    binomial == "Quercus rober" ~ "Quercus robur",
    binomial == "Basswood" ~ "Tilia",
    binomial == "Catalpaciosa" ~ "Catalpa speciosa",
    binomial == "Euyonomous bungeanus" ~ "Euonymus bungeana",
    binomial == "Stypholobium japonicum" ~ "Styphnolobium japonicum",
    TRUE ~ binomial
  )) %>% 
  group_by(binomial) %>% 
  summarise(total = sum(totals)) %>% 
  mutate(genus = str_extract(binomial, "^\\w+")) %>% 
  dplyr::select(binomial, genus, total)


# Define the genera and families
genera_families <- data.frame(
  Genus = c(
    "Abies", "Acer", "Aesculus", "Ailanthus", "Alnus", "Amelanchier", "Betula", "Calocedrus",
    "Carpinus", "Carya", "Catalpa", "Cedrus", "Celtis", "Cercidiphyllum", "Cercis", "Cercocarpus",
    "Chamaecyparis", "Cladrastris", "Cornus", "Corylus", "Cotinus", "Crataegus", "Diospyros",
    "Elaeagnus", "Eucommia", "Euonymus", "Euyonomous", "Evodia", "Fagus", "Forestieria", "Fraxinus",
    "Ginkgo", "Gleditsia", "Gymnocladus", "Heptacodium", "Hesperocyparis", "Ilex", "Juglans", "Juniperus",
    "Koelreuteria", "Larix", "Liquidambar", "Liriodendron", "Maakia", "Maclura", "Magnolia", "Malus",
    "Morus", "Ostrya", "Paulownia", "Phellodendron", "Picea", "Pinus", "Platanus", "Populus", "Prunus",
    "Pseudotsuga", "Ptelea", "Pyrus", "Quercus", "Rhamnus", "Rhus", "Robinia", "Salix", "Sapindus",
    "Sequoiadendron", "Sorbus", "Styphnolobium", "Stypholobium", "Syringa", "Taxodium", "Thuja", "Tilia",
    "Tsuga", "Ulmus", "Xanthoceras", "Zelkova"
  ),
  Family = c(
    "Pinaceae", "Sapindaceae", "Sapindaceae", "Simaroubaceae", "Betulaceae", "Rosaceae", "Betulaceae",
    "Cupressaceae", "Betulaceae", "Juglandaceae", "Bignoniaceae", "Pinaceae", "Cannabaceae",
    "Cercidiphyllaceae", "Fabaceae", "Rosaceae", "Cupressaceae", "Fabaceae", "Cornaceae", "Betulaceae",
    "Anacardiaceae", "Rosaceae", "Ebenaceae", "Elaeagnaceae", "Eucommiaceae", "Celastraceae", "Celastraceae",
    "Rutaceae", "Fagaceae", "Oleaceae", "Oleaceae", "Ginkgoaceae", "Fabaceae", "Fabaceae", "Caprifoliaceae",
    "Cupressaceae", "Aquifoliaceae", "Juglandaceae", "Cupressaceae", "Sapindaceae", "Pinaceae", "Altingiaceae",
    "Magnoliaceae", "Fabaceae", "Moraceae", "Magnoliaceae", "Rosaceae", "Moraceae", "Betulaceae",
    "Paulowniaceae", "Rutaceae", "Pinaceae", "Pinaceae", "Platanaceae", "Salicaceae", "Rosaceae", "Pinaceae",
    "Rutaceae", "Rosaceae", "Fagaceae", "Rhamnaceae", "Anacardiaceae", "Fabaceae", "Salicaceae", "Sapindaceae",
    "Cupressaceae", "Rosaceae", "Fabaceae", "Fabaceae", "Oleaceae", "Cupressaceae", "Cupressaceae", "Malvaceae",
    "Pinaceae", "Ulmaceae", "Sapindaceae", "Ulmaceae"
  )
)


df_joined <- df_cleaned %>% 
  full_join(., genera_families, by = c("genus" = "Genus")) %>% 
  mutate(family = Family) %>% 
  dplyr::select(family, genus, binomial, total) %>% 
  filter(!is.na(total))

df_joined %>% 
    DT::datatable(colnames = c("Family", "Genus", "Binomial", "Count"), 
                extensions = "Buttons", 
                options = list(iDisplayLength = 10, 
                               dom = 'Bfrtip',
                               buttons = c('copy', 'csv', 'excel', 'pdf')))
Show the code
#find counts by species

df_species <- df_joined %>% 
  mutate(full_total = sum(total),
         sp_percent = round((total/full_total)*100,2)) %>% 
  arrange(desc(sp_percent))

df_species %>% 
  DT::datatable(colnames = c("Family", "Genus", "Binomial", "Total Species", "Total All Trees", "Percent Species"), 
                extensions = "Buttons", 
                options = list(iDisplayLength = 10, 
                               dom = 'Bfrtip',
                               buttons = c('copy', 'csv', 'excel', 'pdf')))
Show the code
#find counts by genus
df_genus <- df_joined %>% 
  group_by(genus) %>% 
  summarise(total = sum(total)) %>% 
  ungroup() %>% 
  mutate(full_total = sum(total),
         gn_percent = round((total/full_total)*100,2)) %>% 
  arrange(desc(gn_percent))


df_genus %>% 
    DT::datatable(colnames = c("Genus", "Total Species", "Total All Trees", "Percent Genus"), 
                extensions = "Buttons", 
                options = list(iDisplayLength = 10, 
                               dom = 'Bfrtip',
                               buttons = c('copy', 'csv', 'excel', 'pdf')))
Show the code
#find counts by family
df_family <- df_joined %>% 
  group_by(family) %>% 
  summarise(total = sum(total)) %>% 
  ungroup() %>% 
  mutate(full_total = sum(total),
         fm_percent = round((total/full_total)*100,2)) %>% 
  arrange(desc(fm_percent))


df_family %>% 
      DT::datatable(colnames = c("Family", "Total Species", "Total All Trees", "Percent Family"), 
                extensions = "Buttons", 
                options = list(iDisplayLength = 10, 
                               dom = 'Bfrtip',
                               buttons = c('copy', 'csv', 'excel', 'pdf')))