Updated: 19 December, 2019

1 Introduction

This document evaluates willow height measurement data from project inception through 2019. Data include those archived on the “Digital Collections of Colorado” (DCC) library site and more recent (2015-2019) data. These analyses are aimed at identifying potential issues such as:

  • Inconsistently named factors such as site name or well ID
  • Missing values
  • Data values outside of expected range or showing unusual patterns

DCC links:
Plant level fall current annual growth and height on experimental plots in Yellowstone’s Northern Range, 2002 - 2015: http://hdl.handle.net/10217/173649
Plant level fall current annual growth and height on observation plots in Yellowstone’s Northern Range, 2008 - 2015: http://hdl.handle.net/10217/173657

Associated Publications: Marshall Kristin N., N. Thomson Hobbs and David J. Cooper. Stream Hydrology Limits Recovery of Riparian Ecosystems After Wolf Reintroduction. Proceedings of the Royal Society. B 280, issue 1756 (April 7, 2013): 20122977. http://dx.doi.org/10.1098/rspb.2012.2977

Marshall, Kristin N., David J. Cooper, N. Thompson Hobbs. Interactions Among Herbivory, Climate, Topography and Plant Age Shape Riparian Willow Dynamics in Northern Yellowstone National Park, USA. Journal of Ecology 102, issue 3 (May 2014): 667-677. http://dx.doi.org/10.1111/1365-2745.12225

2 Data Import and Initial Processing

2.1 Functions

##### Function to read in sheets and tabs
read_sheets <- function(file){
  xlsx_file <- file
  xlsx_file %>%
    excel_sheets() %>%
    set_names() %>%
    map_df(read_excel, path = xlsx_file, .id = 'sheet_name', trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:DV46") %>% 
    mutate(file_name = file) %>% 
    select(file_name, sheet_name, everything())
}


##
read_sheets_prod <- function(file){
  xlsx_file <- file
  xlsx_file %>%
    excel_sheets() %>%
    set_names() %>%
    map_df(read_excel, path = xlsx_file, .id = 'sheet_name', trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:Dw136") %>% 
    mutate(file_name = file) %>% 
    select(file_name, sheet_name, everything())
}

read_sheets_util <- function(file){
  xlsx_file <- file
  xlsx_file %>%
    excel_sheets() %>%
    set_names() %>%
    map_df(read_excel, path = xlsx_file, .id = 'sheet_name', trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:DQ170") %>% 
    mutate(file_name = file) %>% 
    select(file_name, sheet_name, everything())
}


## read xlsx then csv cache
read_then_csv <- function(sheet, path) {
  pathbase <- path %>%
    basename() %>%
    tools::file_path_sans_ext()
  path %>%
    read_excel(sheet = sheet) %>% 
    write_csv(paste0(pathbase, "-", sheet, ".csv"))
}

## plotting
ggTile_yr_season_site2 <- function(df){
  df %>%
  group_by(yr, season, site2) %>%
  summarise(n= n()) %>%
  ggplot(aes(yr, site2)) +
  geom_tile(aes(fill = n), color = 'white') +
  viridis::scale_fill_viridis(option = "B") +
  facet_wrap(~season) +
  theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
  theme(axis.text.y = element_text(size = 7)) +
  facet_wrap(~season)}


ggTile_yr_season_site <- function(df){
  df %>%
  group_by(yr, season, site) %>%
  summarise(n= n()) %>%
  ggplot(aes(yr, site)) +
  geom_tile(aes(fill = n), color = 'white') +
  viridis::scale_fill_viridis(option = "D") +
  facet_wrap(~season) +
  theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
  theme(axis.text.y = element_text(size = 7)) +
  facet_wrap(~season)} 

2.2 Lookup tables

## wilid_full spp lookup
lu_wilid_full_spp <- read_csv("./data/lu_wilid_full_spp.csv")  
## bv occ
lu_site_beav <- read_csv("./data/lu_site_site2_beav.csv")


### try to add spp info to the 18 and 19 
wilid.spp.lu2 <- read_csv("./data/wilid_spp._lu_v2.csv") %>% 
  select(c(species, wilid_full)) %>% 
  distinct()

## change ref system to start with lu
lu.wilid.spp.v2 <- read_csv("./data/wilid_spp._lu_v2.csv") %>% 
  select(c(species, wilid_full)) %>% 
  distinct()

plot.lu <- read_csv("./data/plot_lookup_master.csv",col_types = "ccccc")

## read in lu
lu_spp_site2 <- read_csv("./data/lu_spp_site2.csv")

2.3 DCC

## revised 20180228
# w_ht <- read_excel("data/Select_files/willow_height/WillowHt_2001_2017_Final.xlsx")

# dataframe comparisons
# setdiff(bigFrame, smallFrame) # dplyr solution for comparison
# comp <- setdiff(w_ht,w_ht2)

# Using functions in "comparison" package
# comparison <- compare(w_ht,w_ht2,allowAll=TRUE)

## Continuing with the file downloaded from the GDr 20180321
w_ht <- read_excel("./data/raw/ht/2016_2017_Data_Entry_Format_WillowHt_2001_2015.xlsx")

w_ht %>%
  # distinct(season)
  mutate(year = as.character(year)) %>% 
  group_by(site, season, year) %>% 
  tally() %>% 
  ggplot(aes(year, site)) +
  geom_tile(aes(fill = n)) +
  facet_wrap(~season) +
  labs(caption = "data/raw/ht/2016_2017_Data_Entry_Format_WillowHt_2001_2015.xlsx") +
  theme(axis.text.x = element_text(angle = 55, hjust = 1))

w_ht01_17 <- read_excel("data/raw/ht/WillowHt_2001_2017_QAQCd_Version.xlsx")

w_ht <- bind_rows(w_ht, w_ht01_17) %>% 
  # select(-c(ID, exp, dam, browse)) %>% 
  filter(plantht != "NA") %>%
  rename(pl_ht_cm = plantht) %>% 
  mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>% 
  distinct()

### these files have no species id

Interogating other raw files looking for spring data

w_ht01_17 %>% 
  group_by(year, season, site) %>%
  tally() %>% 
  ggplot(aes(as.character(year), season)) +
  geom_tile(aes(fill = n)) +
  labs(caption = "WillowHt_2001_2017_QAQCd_Version.xlsx")

> Still missing spring data…

2.3.1 Raw DCC spring

## MORE SPRING DATA?!
## the experimental
nsf.ht.spr <- read_delim("./data/NSF_DataArchive20180208/Plant_level_spring_current_annual_growth_and_height_on_experimental_plot/willows-plantCAGspring2003-2015.txt", delim = " ")

nsf.ht.spr.obs <- read_delim("./data/NSF_DataArchive20180208/Plant_level_spring_current_annual_growth_and_height_on_observation_plot/OBS-plantCAGspring2009-2013.txt", delim = " ", col_types = "cccccddcccc")

# clean. rename
nsf.ht.spr <- nsf.ht.spr %>% 
  select(year, site, plot, species, willid, plantht) %>% 
  rename(yr = year, wilid = willid, pl_ht_cm = plantht) %>% 
  mutate(season = "spring")

nsf.ht.spr.obs <- nsf.ht.spr.obs %>% 
  select(year, site, plot, species, willid, plantht) %>% 
  rename(yr = year, wilid = willid, pl_ht_cm = plantht)

# clean. na
nsf.ht.spr <- nsf.ht.spr %>% 
  filter(!is.na(pl_ht_cm)) %>% 
  mutate(season = "spring") %>% 
  mutate(yr = as.character(yr)) %>% 
  mutate(wilid = as.character(wilid))

nsf.ht.spr.obs <- nsf.ht.spr.obs %>% 
  filter(!is.na(pl_ht_cm)) %>% 
  mutate(season = "spring") %>% 
  mutate(plot = "obs")

## combine obs and exp for spring
nsf.ht.spr <- bind_rows(nsf.ht.spr,nsf.ht.spr.obs)

# clean
nsf.ht.spr <- nsf.ht.spr %>%
  mutate(plot = if_else(is.na(site),"obs",plot)) %>%
  filter(!is.na(plot))

nsf.ht.spr %>% 
  # distinct(site)
  visdat::vis_miss() +
  labs(caption = "nsf.ht.spr")

nsf.ht.spr %>% 
  mutate(site2 = paste0(site,"-",plot)) %>% 
  ggTile_yr_season_site2() +
  labs(caption = "nsf.ht.spr")

nsf.ht.spr %>%
  mutate(site2 = paste0(site,"-",plot)) %>% 
  group_by(site2, yr) %>% 
  tally() %>% 
  ungroup() %>% 
  ggplot(aes(yr, site2)) +
  geom_tile(aes(fill = n)) +
  labs(title = "combined obs and exp files from DCC")

2.3.2 Fa raw DCC

## Fa raw DCC
nsf.ht.fa.exp <- read_delim("./data/NSF_DataArchive20180208/Plant_level_fall_current_annual_growth_and_height_on_experimental_plot/willows-plantCAGfall2002-2015.txt", delim = " ") %>% 
  select(year, site, plot, species, willid, height) %>% 
  rename(yr = year, wilid = willid, pl_ht_cm = height) %>% 
  mutate(season = "fall")

nsf.ht.fa.obs <- read_delim("./data/NSF_DataArchive20180208/Plant_level_fall_current_annual_growth_and_height_on_observation_plot/OBS_shootCAGfall08-15.txt", delim = " ") %>% 
  select(year, site, plot, species, willid, height) %>% 
  rename(yr = year, wilid = willid, pl_ht_cm = height) %>% 
  mutate(season = "fall")

## combine raw dcc exp and obs
nsf.ht.fa.expobs <- bind_rows(nsf.ht.fa.exp, nsf.ht.fa.obs) %>% 
  distinct() %>% 
  mutate(yr = as.character(yr), wilid = as.character(wilid))

## dcc combined
dcc.ht.fa.spr <- bind_rows(nsf.ht.fa.expobs, nsf.ht.spr) %>%
  mutate(wilid = as.character(wilid)) %>% 
  distinct()

dcc.ht.fa.spr %>% 
  mutate(site2 = paste0(site,"-",plot)) %>% 
  ggTile_yr_season_site2() +
  labs(caption = "dcc.ht.fa.spr")

# clean. na
dcc.ht.fa.spr <- dcc.ht.fa.spr %>% 
  filter(!is.na(pl_ht_cm))

## 
dcc.ht.fa.spr <- dcc.ht.fa.spr %>% 
  mutate(plot = if_else(is.na(plot),"obs",plot)) %>% 
  mutate(wilid_full = paste0(site,"-",plot, "-", wilid)) 

dcc.ht.fa.spr %>%
  visdat::vis_miss()

dcc.ht.fa.spr %>% 
  visdat::vis_guess()

# dcc.ht.fa.spr %>% 
#   # filter(is.na(species)) %>% 
#   filter(yr != "910" & yr != "189" & yr !='36415' & yr != "355" & yr != "379") %>% 
#   distinct(yr, season) %>% 
#   gt()

## lu export
# dcc.wilid.spp.lu <- dcc.ht.fa.spr %>% 
#   distinct(site, plot, species, wilid, wilid_full)
# 
# dcc.wilid.spp.lu %>% 
#   datatable()

## export
# dcc.wilid.spp.lu %>% 
#   write_csv("./data/dcc_wilid_spp_lu.csv")
### Pre-process data
# 
# * Type conversions (e.g., ID's, treatment codes to character) 
# * Identify then filter NA/missing for "plantht" 
# * Create fields defining treatment classes and incorporate into siteid ("siteid2", as defined in the water table dataset) 
# * Create field combining willowid (not unique across all records) with site2  
# 
# # RETURN HERE FOR FOR MORE QA QC
# 
# # convert willow id, dam, browse, exp to character instead of integer
# w_ht <- w_ht %>% 
#   mutate(willid = as.character(willid)) %>% 
#   mutate(exp = as.character(exp)) %>% 
#   mutate(dam = as.character(dam)) %>% 
#   mutate(browse = as.character(browse))
# 
# # create table of just the missing/NA for inspection
# w_ht %>%
#   filter(plantht == "na" | plantht == "missing" | plantht == "NA") %>% 
#   datatable(caption = "Records with missing/NA/na for plantht.")
# 
# ## filter out "NA", "na" and "missing"; convert "plantht" to numeric; create treatment classes in incorporate into new ID fields for summarization
# # w_ht <- w_ht %>%
# #   filter(plantht != "na" & plantht != "missing" & plantht != "NA") %>%
# #   mutate(plantht = as.numeric(plantht)) %>%
# #   mutate(treat = case_when(dam == 1 & browse == 0 ~ "DX",
# #                            dam == 1 & browse == 1 ~ "DC",
# #                            dam == 0 & browse == 0 ~ "CX",
# #                            dam == 0 & browse == 1 ~ "CC",
# #                            dam == 2 & browse == 1 ~ "OBS")) %>%
# #   mutate(site2 = paste(site, treat, sep = '-')) %>%
# #   mutate(willid.site2 = paste(willid, site2, sep = '-'))
# # Note that this doesn't correctly parse the exp and obs sites, as TH pointed out. 
# # So instead see the following modified code:
###

2.3.3 w_ht issue

w_ht <- w_ht %>%
  filter(pl_ht_cm != "na" & pl_ht_cm != "missing" & pl_ht_cm != "NA") %>% 
  mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>% 
  rename(wilid = willid) %>% 
  mutate(wilid_full = paste0(wilid,"=", site)) 

## don't think I need this bc use of lu
w_ht <- w_ht %>%
#   # rename(pl_ht_cm = plantht) %>% 
  mutate(plot = case_when(dam == 1 & browse == 0 ~ "dx",
                           dam == 1 & browse == 1 ~ "dc",
                           dam == 0 & browse == 0 ~ "cx",
                           dam == 0 & browse == 1 & exp == 1 ~ "cc",
                           dam == 0 & browse == 1 & exp != 1 ~ "obs",
                           dam == 0 & browse == 1 ~ "cc",
                           dam == 2 & browse == 1 ~ "obs",
                           TRUE ~ "oth")) %>% 
  mutate(site2 = paste0(site, plot, sep = '-'))
  

# not sure on the interpretation of the "2"
# w_ht %>% distinct(dam) # Three values: 0,1,2. Expected only 0,1.
# added the site2 field to match that used in well data 

# Tip: if you are testing a scalar, use if(). Testing a vector against a single condition, dplyr::if_else. Testing a vector against multiple conditions, use case_when.
w_ht <- w_ht %>% 
  mutate_if(.predicate = is.character,.funs=tolower)

w_ht <- w_ht %>% 
  mutate(site = case_when(site == "halibuff"~"hailbuf",
                          TRUE ~ site))

w_ht %>% 
  visdat::vis_dat()

# w_ht %>%
#   distinct(site) %>%
#   View()

2.4 2016 Production

p16.raw <- read_excel("./data/raw/production/2016_Raw_Production_20180421.xlsx", col_types = "text") %>% 
  janitor::clean_names()
 
p16.raw <- p16.raw %>% 
  rename(wilid = wildid) %>% 
  rename(species = spp) %>% 
  rename(pl_ht_cm = height_cm) %>%
  rename(live_dead = live) %>% 
  mutate(season = "spring") %>% 
  mutate(yr = as.character("2019"))

# some cleaning
p16.raw <- p16.raw %>% 
  mutate(plot = if_else(is.na(plot),"obs",plot)) %>%
  mutate(site2 = paste0(site, "-", "plot")) %>%  
  filter(wilid != "?") %>% 
  mutate(wilid = str_remove(.$wilid, ".0"))


p16.ht <- p16.raw %>% 
  select(c(site, site2, plot, species, wilid, pl_ht_cm, site)) %>% 
  mutate(wilid_full = paste0(site,"-",plot,"-",wilid))

p16.ht %>% 
  visdat::vis_miss()

# p16.ht <- p16.ht %>% 
#   left_join(., lu.wilid.spp.v2, by = "wilid_full") %>%
#   mutate(species = case_when(is.na(species.x)~species.y,
#                              is.na(species.y)~species.x,
#                              TRUE ~ "NA")) %>% 
#   select(-c(species.x, species.y))

p16.ht <- 
  p16.ht %>% 
  # filter(!is.na(pl_ht_cm)) %>% 
  filter(!is.na(wilid))

p16.ht %>% 
  visdat::vis_dat()

# u16.ht %>% 
#   filter(is.na(species))
p16.ht <- p16.ht %>%
  select(-c(species)) %>%
  mutate(yr = as.character("2016")) %>%
  mutate(season = "fall") %>%
  distinct()
  
p16.ht %>%   
  ggTile_yr_season_site() +
  labs(caption = "p16.ht")

2.5 2016 Utilization

# 2016 utilization
u16.raw <- read_excel("./data/raw/utilization/Raw_2016_Utilization/2016_Raw_Utilization.xlsx", col_types = "text") %>% 
  janitor::clean_names()

u16.raw <- u16.raw %>% 
  rename(wilid = wildid) %>% 
  mutate(wilid = as.numeric(wilid)) %>% 
  mutate(wilid = as.character(wilid)) %>%
  rename(species = spp) %>% 
  rename(pl_ht_cm = ht_cm) %>%
  rename(live_dead = live) %>% 
  mutate(season = "spring") %>% 
  mutate(yr = as.character('yr'))

# some cleaning
u16.raw <- u16.raw %>% 
  mutate(site = if_else(site == "elk5 ?", "elk5",site)) %>%
  mutate(plot = if_else(is.na(plot),"obs",plot)) %>%
  mutate(site = if_else(site == "xtal","crystal",site)) %>% 
  mutate(site2 = paste0(site, "-", "plot")) 

u16.raw %>% 
  distinct(site, plot) %>% 
  datatable()
u16.ht <- u16.raw %>% 
  select(c(site, site2, plot, species, wilid, pl_ht_cm, site, live_dead)) %>% 
  mutate(wilid_full = paste0(site,"-",plot,"-",wilid))

u16.ht %>% 
  visdat::vis_miss()

# './data/2016_2017_Data_Entry_Format_WillowHt_2001_2015.csv'

u16.ht <- u16.ht %>% 
  left_join(., lu.wilid.spp.v2, by = "wilid_full") %>%
  mutate(species = case_when(is.na(species.x)~species.y,
                             is.na(species.y)~species.x,
                             TRUE ~ "NA")) %>% 
  select(-c(species.x, species.y))

u16.ht %>% 
  visdat::vis_dat()

# u16.ht <- u16.ht %>% 
#   filter(!is.na(pl_ht_cm))

# u16.ht %>% 
#   filter(is.na(species))
ht_lm_entry <- read_csv("./data/raw/ht/2016_2017_Data_Entry_Format_WillowHt_2001_2015.csv")

ht_lm_entry <- ht_lm_entry %>% 
  janitor::clean_names() %>% 
  select(-id) %>% 
  rename(wilid = willid) %>% 
  mutate(plot = case_when(dam == "1" & browse == "1" ~ "dc",
                          dam == "1" & browse == "0" ~ "dx",
                          dam == "0" & browse == "1" ~ "cc",
                          dam == "0" & browse == "0" ~ "cx",
                          TRUE ~ "obs"))

ht_lm_entry <- ht_lm_entry %>%
  rename(yr = year) %>%
  rename(pl_ht_cm = plantht) %>%
  mutate_if(.predicate = is.numeric,.funs = as.character) %>%
  mutate(pl_ht_cm = as.numeric(pl_ht_cm)) 

ht_lm_entry %>%
  mutate(site2 = paste0(site,"-",plot)) %>% 
  group_by(yr, season, site2) %>% 
  tally() %>% 
  filter(yr == "2017") %>% 
  filter(season != "fall") %>%
  ggplot(aes(yr, site2)) +
  geom_tile(aes(fill = n)) +
  labs(caption = "Only 2017 data")

## only 2017 spring data
ht_lm_entry_17 <- ht_lm_entry %>%
  filter(yr == '2017')

2.6 2017 Production

p17.raw <- read_excel("./data/raw/production/2017_Raw_Production_20180423.xlsx", col_types = "text") %>% 
  janitor::clean_names()

p17.raw %>% glimpse()
## Observations: 1,122
## Variables: 79
## $ date                      <chr> "8152017", "8152017", "8152017", "8152017...
## $ technician                <chr> "tg sb ak hh", "tg sb ak hh", "tg sb ak h...
## $ site                      <chr> "wb", "wb", "wb", "wb", "wb", "wb", "wb",...
## $ plot                      <chr> "cx", "cx", "cx", "cx", "cx", "cx", "cx",...
## $ spp                       <chr> "beb", "beb", "beb", "beb", "beb", "beb",...
## $ wildid                    <chr> "257", "257", "257", "260", "260", "260",...
## $ sapsucker_wells_y_n       <chr> "y", "y", "y", "y", "y", "y", "y", "y", "...
## $ cytospora_fungus_y_n      <chr> "y", "y", "y", "y", "y", "y", "y", "y", "...
## $ height_cm                 <chr> "227", "227", "227", "402", "402", "402",...
## $ long_diameter_cm          <chr> "115", "115", "115", "360", "360", "360",...
## $ perp_diameter_cm          <chr> "65", "65", "65", "290", "290", "290", "1...
## $ number_new_dead_stems     <chr> "0", "0", "0", "0", "0", "0", "0", "0", "...
## $ number_of_new_stems       <chr> "3", "3", "3", "0", "0", "0", "0", "0", "...
## $ new_stem_length           <chr> "43", "43", "43", NA, NA, NA, NA, NA, NA,...
## $ stem_id                   <chr> "11704", "25813", "5609", "262", "260", "...
## $ live                      <chr> "live", "live", "live", "live", "live", "...
## $ meas_extent               <chr> NA, "no todos", NA, "band", "band", "band...
## $ notes                     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ check_meas                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, "chec...
## $ stem_basal_mm             <chr> "206", "18.5", "13.9", "37.20000000000000...
## $ biggest_shoot_diameter_mm <chr> "na", "na", "na", "na", "na", "na", "na",...
## $ long_shoot_length_cm      <chr> "12", "15", NA, "15", "21", "82", "10", "...
## $ br_scheme                 <chr> "0", "0", "0", "0", "0", "0", "0", "0", "...
## $ ub_scheme                 <chr> "25", "20", "1", "70", "30", "100", "40",...
## $ sht_len_1                 <chr> "2", "1", "1", "3", "1", "3", "2", "2", "...
## $ sht_len_2                 <chr> "1", "3", "1", "3", "2", "4", "2", "2", "...
## $ sht_len_3                 <chr> "1", "1", "2", "2", "5", "7", "2", "2", "...
## $ sht_len_4                 <chr> "1", "1", "1", "4", "3", "4", "4", "2", "...
## $ sht_len_5                 <chr> "1", "1", "1", "1", "1", "2", "3", "2", "...
## $ sht_len_6                 <chr> "2", "1", "1", "5", "3", "2", "3", "3", "...
## $ sht_len_7                 <chr> "1", "1", "1", "1", "1", "4", "2", "4", "...
## $ sht_len_8                 <chr> "2", "1", "1", "2", "2", "4", "1", "2", "...
## $ sht_len_9                 <chr> "1", "2", "1", "2", "2", NA, "4", "6", "1...
## $ sht_len_10                <chr> "3", "1", "1", "6", "5", NA, "3", "6", "1...
## $ sht_len_11                <chr> "3", "1", "1", "3", "11", NA, NA, "1", "2...
## $ sht_len_12                <chr> "7", "1", "1", "3", "2", NA, NA, "3", "3"...
## $ sht_len_13                <chr> "6", "1", "2", "1", "11", NA, NA, NA, "2"...
## $ sht_len_14                <chr> NA, "4", "1", "2", "3", NA, NA, NA, "2", ...
## $ sht_len_15                <chr> NA, "14", "1", NA, "1", NA, NA, NA, "3", ...
## $ sht_len_16                <chr> NA, "6", "1", NA, "2", NA, NA, NA, NA, NA...
## $ sht_len_17                <chr> NA, "2", "1", NA, "3", NA, NA, NA, NA, NA...
## $ sht_len_18                <chr> NA, "2", "1", NA, "4", NA, NA, NA, NA, NA...
## $ sht_len_19                <chr> NA, NA, "5", NA, "3", NA, NA, NA, NA, NA,...
## $ sht_len_20                <chr> NA, NA, "1", NA, "2", NA, NA, NA, NA, NA,...
## $ sht_len_21                <chr> NA, NA, "1", NA, "3", NA, NA, NA, NA, NA,...
## $ sht_len_22                <chr> NA, NA, "1", NA, "2", NA, NA, NA, NA, NA,...
## $ sht_len_23                <chr> NA, NA, "2", NA, "2", NA, NA, NA, NA, NA,...
## $ sht_len_24                <chr> NA, NA, "1", NA, "1", NA, NA, NA, NA, NA,...
## $ sht_len_25                <chr> NA, NA, "1", NA, "2", NA, NA, NA, NA, NA,...
## $ sht_len_26                <chr> NA, NA, "1", NA, "1", NA, NA, NA, NA, NA,...
## $ sht_len_27                <chr> NA, NA, "2", NA, "7", NA, NA, NA, NA, NA,...
## $ sht_len_28                <chr> NA, NA, "1", NA, "3", NA, NA, NA, NA, NA,...
## $ sht_len_29                <chr> NA, NA, "1", NA, "3", NA, NA, NA, NA, NA,...
## $ sht_len_30                <chr> NA, NA, "1", NA, "1", NA, NA, NA, NA, NA,...
## $ sht_len_31                <chr> NA, NA, "1", NA, "1", NA, NA, NA, NA, NA,...
## $ sht_len_32                <chr> NA, NA, "1", NA, "2", NA, NA, NA, NA, NA,...
## $ sht_len_33                <chr> NA, NA, "1", NA, "3", NA, NA, NA, NA, NA,...
## $ sht_len_34                <chr> NA, NA, "1", NA, "1", NA, NA, NA, NA, NA,...
## $ sht_len_35                <chr> NA, NA, "1", NA, "1", NA, NA, NA, NA, NA,...
## $ sht_len_36                <chr> NA, NA, "1", NA, NA, NA, NA, NA, NA, NA, ...
## $ sht_len_37                <chr> NA, NA, "1", NA, NA, NA, NA, NA, NA, NA, ...
## $ sht_len_38                <chr> NA, NA, "2", NA, NA, NA, NA, NA, NA, NA, ...
## $ sht_len_39                <chr> NA, NA, "2", NA, NA, NA, NA, NA, NA, NA, ...
## $ sht_len_40                <chr> NA, NA, "2", NA, NA, NA, NA, NA, NA, NA, ...
## $ sht_len_41                <chr> NA, NA, "2", NA, NA, NA, NA, NA, NA, NA, ...
## $ sht_len_42                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_43                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_44                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_45                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_46                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_47                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_48                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_49                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_50                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_51                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_52                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_53                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ unbrowse_remainder        <chr> "13", "8", NA, "38", "18", "82", "7", "7"...
## $ browse_remainder          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
p17.raw %>% distinct(plot)
## # A tibble: 9 x 1
##   plot 
##   <chr>
## 1 cx   
## 2 dx   
## 3 cc   
## 4 dc   
## 5 1    
## 6 <NA> 
## 7 4    
## 8 5    
## 9 6
## bad plot data entry
p17.raw <- p17.raw %>%
  # distinct(plot)
  mutate(plot = case_when(is.na(plot)~"obs", 
                          plot == "1" ~ "obs",
                          plot == "4" ~ "obs",
                          plot == "5" ~ "obs",
                          plot == "6" ~ "obs",
                          TRUE ~ plot)) 
p17.raw %>%
  tabyl(site, plot) %>% 
  gt() %>% 
  tab_header(title = "Count of observations in 2017 raw production data")
Count of observations in 2017 raw production data
site cc cx dc dx obs
crescent 0 0 0 0 16
eb1 36 28 57 60 0
eb2 18 37 0 45 0
elk 49 78 38 70 9
elk1 0 0 0 0 18
elk2 0 0 0 0 16
elk3 0 0 0 0 9
elk4 0 0 0 0 15
elk5 0 0 0 0 16
ghole 0 0 0 0 18
glen 0 0 0 0 16
halibuff 0 0 0 0 6
lb 0 0 0 0 53
lb1 0 0 0 0 13
lb2 0 0 0 0 22
lb3 0 0 0 0 19
lostc 0 0 0 0 17
lostl 0 0 0 0 15
oxbow 0 0 0 0 15
rose 0 0 0 0 12
slide 0 0 0 0 8
wb 69 71 71 67 14
wn 0 1 0 0 0
p17.raw <- p17.raw %>% 
  rename(wilid = wildid) %>% 
  rename(species = spp) %>% 
  rename(pl_ht_cm = height_cm) %>%
  rename(live_dead = live) %>% 
  mutate(season = "fall") %>% 
  mutate(yr = "2017") %>% 
  mutate(yr = as.character(yr))

# some cleaning
p17.raw <- p17.raw %>% 
  # mutate(site = if_else(site == "elk5 ?", "elk5",site)) %>%
  mutate(plot = if_else(is.na(plot),"obs",plot)) %>%
  mutate(site2 = paste0(site, "-", plot)) 

p17.ht <- p17.raw %>% 
  select(c(yr, season, site, site2, plot, species, wilid, pl_ht_cm, live_dead)) %>% 
  mutate(wilid_full = paste0(site,"-",plot,"-",wilid))

# './data/2016_2017_Data_Entry_Format_WillowHt_2001_2015.csv'

# p17.ht <- p17.ht %>% 
#   left_join(., lu.wilid.spp.v2, by = "wilid_full") %>%
#   mutate(species = case_when(is.na(species.x)~species.y,
#                              is.na(species.y)~species.x,
#                              TRUE ~ "NA")) %>% 
#   select(-c(species.x, species.y))

p17.ht <- p17.ht %>% 
  filter(!is.na(pl_ht_cm)) %>% 
  mutate(pl_ht_cm = as.numeric(pl_ht_cm))

p17.ht %>% 
  visdat::vis_dat()

p17.ht %>% 
  tabyl(site2,plot) %>% 
  gt() %>% 
  tab_header(title = "2017 Fall Count by Site2 and Plot")
2017 Fall Count by Site2 and Plot
site2 cc cx dc dx obs
crescent-obs 0 0 0 0 16
eb1-cc 34 0 0 0 0
eb1-cx 0 28 0 0 0
eb1-dc 0 0 53 0 0
eb1-dx 0 0 0 59 0
eb2-cc 18 0 0 0 0
eb2-cx 0 37 0 0 0
eb2-dx 0 0 0 45 0
elk-cc 45 0 0 0 0
elk-cx 0 77 0 0 0
elk-dc 0 0 37 0 0
elk-dx 0 0 0 70 0
elk-obs 0 0 0 0 9
elk1-obs 0 0 0 0 4
elk2-obs 0 0 0 0 13
elk3-obs 0 0 0 0 5
elk4-obs 0 0 0 0 15
elk5-obs 0 0 0 0 16
ghole-obs 0 0 0 0 18
glen-obs 0 0 0 0 16
halibuff-obs 0 0 0 0 6
lb-obs 0 0 0 0 53
lb1-obs 0 0 0 0 13
lb2-obs 0 0 0 0 22
lb3-obs 0 0 0 0 16
lostc-obs 0 0 0 0 17
lostl-obs 0 0 0 0 15
oxbow-obs 0 0 0 0 15
rose-obs 0 0 0 0 12
slide-obs 0 0 0 0 8
wb-cc 69 0 0 0 0
wb-cx 0 71 0 0 0
wb-dc 0 0 71 0 0
wb-dx 0 0 0 67 0
wb-obs 0 0 0 0 14
wn-cx 0 1 0 0 0
p17.ht <- p17.ht %>% 
  mutate(site = case_when(site == "wn" ~ "wb",
         TRUE ~ site))

p17.ht %>% 
  distinct(site2, plot, wilid) %>% 
  datatable()
p17.ht %>%  distinct(wilid_full) %>% datatable()
## issue with obs plots having 
p17.ht %>% 
  ggTile_yr_season_site2() +
  labs(caption = "p17.ht")

2.7 2017 Utilization

# 2017 utilization
u17.raw <- read_excel("./data/raw/utilization/Raw_2017_Utilization/2017_Raw_Utilization.xlsx", col_types = "text") %>% 
  janitor::clean_names()

u17.raw <- u17.raw %>% 
  rename(wilid = wildid) %>% 
  rename(species = spp) %>% 
  rename(pl_ht_cm = ht_cm) %>%
  rename(live_dead = live) %>% 
  mutate(season = "spring") %>% 
  mutate(yr = as.character("2017"))

# some cleaning
u17.raw <- u17.raw %>% 
  mutate(site = if_else(site == "elk5 ?", "elk5",site)) %>%
  mutate(plot = if_else(is.na(plot),"obs",plot)) %>%
  mutate(site2 = paste0(site, "-", plot)) 
#
u17.raw <- u17.raw %>%
  filter(!is.na(pl_ht_cm)) %>%  
  select(c(yr, site, site2, plot, wilid, season, yr, pl_ht_cm, live_dead)) %>% 
  mutate(wilid_full = paste0(site,"-",plot,"-",wilid)) %>% 
  mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>% 
  filter(pl_ht_cm < 800)

# u17.raw %>% 
#   distinct(wilid) %>% 
#   View()

## filter out the 
u17.ht <- u17.raw %>% 
  filter(live_dead != "DEAD" & 
           live_dead != "dead" &
           live_dead != "missing")

### summarizing by wilid
u17.ht <- u17.ht %>% 
  select(-live_dead) %>% 
  group_by(yr, site, site2, season, plot, wilid, wilid_full) %>% 
  summarise(pl_ht_cm = max(pl_ht_cm)) %>% 
  ungroup()

u17.ht %>%
  visdat::vis_miss() +
  labs(caption = "missing data, raw 2017 data")

u17.ht %>% 
  visdat::vis_dat() +
  labs(caption = "u17.ht")

u17.ht %>% 
  glimpse()
## Observations: 378
## Variables: 8
## $ yr         <chr> "2017", "2017", "2017", "2017", "2017", "2017", "2017", ...
## $ site       <chr> "crescent", "crescent", "crescent", "crescent", "crescen...
## $ site2      <chr> "crescent-obs", "crescent-obs", "crescent-obs", "crescen...
## $ season     <chr> "spring", "spring", "spring", "spring", "spring", "sprin...
## $ plot       <chr> "obs", "obs", "obs", "obs", "obs", "cc", "cc", "cc", "cc...
## $ wilid      <chr> "86", "89", "92", "95", "98", "493", "499", "602", "605"...
## $ wilid_full <chr> "crescent-obs-86", "crescent-obs-89", "crescent-obs-92",...
## $ pl_ht_cm   <dbl> 56, 56, 51, 40, 49, 84, 97, 85, 123, 111, 104, 137, 103,...
u17.ht <- u17.ht %>% 
  mutate(plot = case_when(site2 == "elk4-35" ~"obs",
                          TRUE ~ plot)) %>%
  mutate(plot = case_when(site2 == "elk3-35" ~"obs",
                          TRUE ~ plot)) %>%
  mutate(site2 = paste0(site,"-",plot)) %>% 
  mutate(wilid_full = paste0(site2,"-",wilid))

u17.ht %>% 
  ggTile_yr_season_site2() +
  theme_grey()

gg.u17 <- u17.ht %>% 
  ggplot(aes(x = site2, y = pl_ht_cm)) +
  geom_point(aes(color = plot))  +
  coord_flip()

plotly::ggplotly(gg.u17)
## issue with obs plots having 
u17.ht %>% 
  ggTile_yr_season_site2() +
  labs(caption = "u17.ht")

2.8 2018 Production

# updated as of 20191203
files2018 <- fs::dir_ls("./data/raw/production/Raw_2018_Production", recurse = FALSE, glob = "*.xlsx")

## csv cache
# path <- readxl_example("datasets.xlsx")
# path %>%
#   excel_sheets() %>%
#   set_names() %>% 
#   map(read_then_csv, path = path)

## not working as intended
# read_then_csv2 <- function(fpath){
#   fpath %>% 
#   excel_sheets() %>%
#   set_names() %>%
#   map(read_then_csv, path = fpath)
# } 
#   
# files2018 %>% 
#   map(.x = .,read_then_csv2)

# library(readxl)

## orignal way. works but only reads in one sheet!!
# #for each excel file name, read excel sheet and append to df
# p18 <- files2018 %>% 
#   map_df( ~ read_excel(path = .x,trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:DV46"))
# # note use of specific range and skipping of column

#### running the above function on 2018 data
# p18 <- files2018 %>% 
#   map_df(~ read_sheets(.))

p18 <- files2018 %>% 
  map_df(~ read_sheets_prod(.))

p18 <- p18 %>% 
  janitor::clean_names() %>% 
  mutate(yr = 2018) %>% 
  mutate(season = "fall")
# p19 %>% 
#   names() %>%
#   tibble::enframe(name = NULL) %>%
#   datatable()
## Select subset and type cast
p18sel <- p18 %>% 
  select(file_name, sheet_name, yr, season, site, plot, spp, wilid, pl_ht_cm, stid, live_status) 


# obs for na
p18sel <- p18sel %>% 
  mutate(plot = if_else(condition = is.na(plot), true = "obs", plot))
  
p18sel %>% 
  glimpse()
## Observations: 4,422
## Variables: 11
## $ file_name   <chr> "./data/raw/production/Raw_2018_Production/crescent_obs...
## $ sheet_name  <chr> "Sheet1", "Sheet1", "Sheet1", "Sheet1", "Sheet1", "Shee...
## $ yr          <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2...
## $ season      <chr> "fall", "fall", "fall", "fall", "fall", "fall", "fall",...
## $ site        <chr> "cresc", "cresc", "cresc", "cresc", "cresc", "cresc", "...
## $ plot        <chr> "obs", "obs", "obs", "obs", "obs", "obs", "obs", "obs",...
## $ spp         <chr> "geyer", "geyer", "geyer", "geyer", "geyer", "geyer", "...
## $ wilid       <chr> "86", "86", "86", "86", "86", "86", "89", "89", "89", "...
## $ pl_ht_cm    <chr> "108", "108", "108", "108", "108", "108", "68", "68", "...
## $ stid        <chr> "86", "86", "87", "87", "88", "88", "89", "89", "90", "...
## $ live_status <chr> "dead", "dead", "live", "live", "live", "live", "missin...
p18sel %>% 
  visdat::vis_dat()

# p18sel %>% 
#   distinct() %>% 
#   select(-date) %>% 
#   filter(is.na(pl_ht_cm)) %>% 
#   datatable(caption = "Missing plant ht - 2018 production")

# eliminate duplicate rows 
p18sel <- p18sel %>% 
  distinct() %>% 
  # select(-date) %>% 
  filter(!is.na(pl_ht_cm))

visdat::vis_dat(p18sel)

2.9 2019 Production

# updated as of 20191203
files2019 <- fs::dir_ls("./data/raw/production/Raw_2019_Production", recurse = FALSE, glob = "*.xlsx")

# ## original way
# #for each excel file name, read excel sheet and append to df
# p19 <- files2019 %>% 
#   map_df( ~ read_excel(path = .x,trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:DV46"))
# 

#### running the above function on 2018 data
# p19 <- files2019 %>% 
#   map_df(~ read_sheets(.))

p19 <- files2019 %>% 
  map_df(~ read_sheets_prod(.))

p19 <- p19 %>% 
  janitor::clean_names() %>% 
  mutate(yr = 2019) %>% 
  mutate(season = "fall")

p19 %>% 
  names() %>%
  tibble::enframe(name = NULL) %>%
  datatable()
## Select subset and type cast
p19sel <- p19 %>% 
  select(file_name, sheet_name, yr, season, site, plot, spp, wilid, pl_ht_cm, stid, live_status) 

p19sel <- p19sel %>% 
  mutate(plot = if_else(condition = (is.na(plot)), true = "obs", plot)) 

# eliminate duplicate rows 
p19sel <- p19sel %>% 
  distinct() 

## combine 18 and 19
p18p19sel <- bind_rows(p18sel, p19sel)

visdat::vis_miss(p18p19sel)

# make all lower
p18p19sel <- p18p19sel %>%  
  mutate_if(.predicate = is.character,.funs=tolower)

p18p19sel <- p18p19sel %>%
  filter(!is.na(pl_ht_cm))
# remove records that have na for wilid
p18p19sel <- p18p19sel %>% 
  filter(!is.na(wilid)) %>%
  filter(!is.na(site)) %>% 
  filter(!is.na(pl_ht_cm))

## summarize the willow height by wilid and year
p1819.wilidht <- p18p19sel %>% 
  group_by(yr, site, spp, wilid, live_status) %>% 
  summarise(pl_ht_cm = max(pl_ht_cm)) %>% 
  ungroup()

# eliminate the na for plant height
# p1819.wilidht %>%
#   filter(is.na(pl_ht_cm)) %>% 
#   gt() %>% 
#   tab_header(title = "Records with NA for plant ht ")

# eliminate the na for plant height
p1819.wilidht <- p1819.wilidht %>%
  filter(!is.na(pl_ht_cm))

2.10 2018 Utilization

ufiles2018 <- fs::dir_ls("./data/raw/utilization/Raw_2018_Utilization", recurse = FALSE, glob = "*.xlsx")

## single sheet
# u18 <- ufiles2018 %>%
#   map_df( ~ read_excel(path = .x,trim_ws = TRUE, skip = 1, col_types = "text"))

## add code to read multitabs...
# u18 <- ufiles2018 %>% 
#   map_df(~ read_sheets(.))

u18 <- ufiles2018 %>% 
  map_df(~ read_sheets_util(.))

# u18 %>% distinct(file_name)

# map_df( ~ read_excel(path = .x,trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:DV46"))
# # note use of specific range and skipping of column

u18 <- u18 %>% 
  janitor::clean_names() %>% 
  mutate(yr = 2018) %>% 
  mutate(season = "spring") 
## whack a mole.
# change lb41 to lb4
u18 <- u18 %>% 
  mutate(site = case_when(site == "lb41" ~ "lb4",
                          TRUE ~ site)) 
# u18 %>% 
#   glimpse()
u18 %>% 
  names() %>%
  tibble::enframe(name = NULL) %>% 
  gt() %>% 
  tab_header(title = "variable names")
variable names
value
file_name
sheet_name
date
tech_initial
site
plot
spp
wilid
stid
yr_added
pl_ht_cm
live_status
meas_extent
tag_type
notes_util_2018
notes_prod_2018
rem
sl
scheme
len_dia
x1a
x1b
x2a
x2b
x3a
x3b
x4a
x4b
x5a
x5b
x6a
x6b
x7a
x7b
x8a
x8b
x9a
x9b
x10a
x10b
x11a
x11b
x12a
x12b
x13a
x13b
x14a
x14b
x15a
x15b
x16a
x16b
x17a
x17b
x18a
x18b
x19a
x19b
x20a
x20b
x21a
x21b
x22a
x22b
x23a
x23b
x24a
x24b
x25a
x25b
x26a
x26b
x27a
x27b
x28a
x28b
x29a
x29b
x30a
x30b
x31a
x31b
x32a
x32b
x33a
x33b
x34a
x34b
x35a
x35b
x36a
x36b
x37a
x37b
x38a
x38b
x39a
x39b
x97
x98
x99
x100
x101
x102
x103
x104
x105
x106
x107
x108
x109
x110
x111
x112
x113
x114
x115
x116
x117
x118
x119
x120
x121
tech_initials
willid
height_cm
notes_prod_2017
notes_util_2019
notes_prod_2019
x2017_production_notes
x2018_utilization_notes
x78
x79
x80
x81
x82
x83
x84
x85
x86
x87
x88
x89
x90
x91
x92
x93
x94
x95
x96
date_year_month_day
x40a
x40b
x41a
x41b
x42a
x42b
x43a
x43b
x44a
x44b
x45a
x45b
x46a
x46b
x47a
x47b
x48a
x48b
x49a
x49b
ht_cm
yr
season
u18 %>% 
  distinct(site) %>% gt()
site
cresc
NA
eb1
eb2
elk
ghole
glen
hailbuff
lava
lb1
lb2
lb3
lb4
lb5
lb6
lost c
lost l
oxbow
rose
slide
wb
wb4
wb2
wb1
yancy
## WB?
## BIG fix: pl_ht and pl_ht_cm
## willid and wilid
u18 %>% 
  select(file_name,contains("cm")) %>% 
  distinct()
## # A tibble: 231 x 4
##    file_name                                            pl_ht_cm height_cm ht_cm
##    <chr>                                                <chr>    <chr>     <chr>
##  1 ./data/raw/utilization/Raw_2018_Utilization/crescen~ 57       <NA>      <NA> 
##  2 ./data/raw/utilization/Raw_2018_Utilization/crescen~ NA       <NA>      <NA> 
##  3 ./data/raw/utilization/Raw_2018_Utilization/crescen~ 50       <NA>      <NA> 
##  4 ./data/raw/utilization/Raw_2018_Utilization/crescen~ 52       <NA>      <NA> 
##  5 ./data/raw/utilization/Raw_2018_Utilization/crescen~ <NA>     <NA>      <NA> 
##  6 ./data/raw/utilization/Raw_2018_Utilization/eb1_exp~ <NA>     107       <NA> 
##  7 ./data/raw/utilization/Raw_2018_Utilization/eb1_exp~ <NA>     108       <NA> 
##  8 ./data/raw/utilization/Raw_2018_Utilization/eb1_exp~ <NA>     136       <NA> 
##  9 ./data/raw/utilization/Raw_2018_Utilization/eb1_exp~ <NA>     <NA>      <NA> 
## 10 ./data/raw/utilization/Raw_2018_Utilization/eb1_exp~ <NA>     129       <NA> 
## # ... with 221 more rows
u18 <- u18 %>% 
  mutate(pl_ht_cm = case_when(is.na(height_cm)~ ht_cm,
                              is.na(pl_ht_cm)~height_cm,
                              TRUE ~ pl_ht_cm)) %>%
  mutate(wilid = case_when(is.na(wilid) ~ willid,
                              is.na(willid)~ wilid,
                              TRUE ~ wilid))%>% 
  filter(!is.na(pl_ht_cm)) %>%
  select(-c(height_cm,willid))

u18 %>% 
  select(file_name,contains("cm")) %>% 
  distinct() %>% 
  datatable()
### select subset of columns: for height
u18.ht <- u18 %>%
  # glimpse()
  select(c('file_name','yr','season', 'spp', 'plot','site','wilid','stid', 'live_status', 'pl_ht_cm'))

# u18.ht %>% 
#   filter(site == "wb")

u18.ht %>% 
  filter(!is.na(pl_ht_cm)) %>% 
  # filter(!is.na(site)) %>% 
  distinct(site) %>% 
  gt::gt() %>% 
  tab_header(title = "Distinct site on u18 import")
Distinct site on u18 import
site
eb1
eb2
elk
ghole
glen
hailbuff
lava
lb1
lb2
lb3
lb4
lb5
lb6
lost c
lost l
oxbow
rose
slide
wb
wb4
wb2
wb1
# u18.ht %>% 
#   distinct(pl_ht_cm) %>% 
#   View() # some rows have na entred as text

# rename, reclass
u18.ht <- u18.ht %>% 
  mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>% 
  mutate(plot = if_else(condition = is.na(plot), true = "obs", plot))

# clean
u18.ht <- u18.ht %>% 
  distinct()

# more cleaning
u18.ht <- u18.ht %>% 
  filter(!is.na(pl_ht_cm)) %>% 
  filter(wilid != "?") %>% 
  mutate(wilid = str_remove(.$wilid, ".0")) # remove the ".0" part of strings
##########
## calculate the per wilid height from population of stid
##########
u18.ht <- u18.ht %>% 
  filter(!is.na(pl_ht_cm)) %>% 
  group_by(yr, wilid, site, spp, plot, season, live_status) %>% 
  summarise(pl_ht_cm = max(pl_ht_cm)) %>% 
  ungroup()

u18.ht <- u18.ht %>% 
  filter(!is.na(wilid)) %>%
  filter(!wilid == "") %>% 
  mutate(wilid = str_remove(.$wilid, ".0"))

u18.ht %>% 
  distinct(site) 
## # A tibble: 21 x 1
##    site    
##    <chr>   
##  1 lb3     
##  2 lost l  
##  3 slide   
##  4 wb      
##  5 wb2     
##  6 hailbuff
##  7 lost c  
##  8 lb1     
##  9 eb1     
## 10 lava    
## # ... with 11 more rows
## where is wb???
u18.ht %>% 
  visdat::vis_miss()

2.10.0.1 Diagnostic plots

u18.ht %>%
  # visdat::vis_guess() + 
  # visdat::vis_miss() +
  visdat::vis_dat() +
  theme_classic() +
  labs(title = "2018 Spring Height", subtitle = "Missing map for qa/qc") +
  scale_fill_viridis(discrete = TRUE, option = "D") +
  coord_flip()

u18.plot1 <- u18.ht %>% 
  mutate(yr = as.character(yr)) %>% 
  ggplot(aes(site, pl_ht_cm)) +
  # geom_boxplot(aes(color = site)) +
  geom_jitter(alpha = .5)
plotly::ggplotly(u18.plot1)  

2.11 2019 Utilization

### read_sheets
# read_sheets <- function(file){
#   xlsx_file <- file
#   xlsx_file %>%
#     excel_sheets() %>%
#     set_names() %>%
#     map_df(read_excel, path = xlsx_file, .id = 'sheet_name', trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:DV46") %>% 
#     mutate(file_name = file) %>% 
#     select(file_name, sheet_name, everything())
# }

#### 
ufiles2019 <- fs::dir_ls("./data/raw/utilization/Raw_2019_Utilization", recurse = FALSE, glob = "*.xlsx")

# u19 <- ufiles2019 %>%
#   map_df( ~ read_excel(path = .x,trim_ws = TRUE, skip = 1, col_types = "text"))

## multitab
# u19 <- ufiles2019 %>% 
#   map_df(~ read_sheets(.))

u19 <- ufiles2019 %>% 
  map_df(~ read_sheets_util(.))

# map_df( ~ read_excel(path = .x,trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:DV46"))
# # note use of specific range and skipping of column

u19 <- u19 %>% 
  janitor::clean_names() %>% 
  mutate(yr = 2019) %>% 
  mutate(season = "spring") 

u19 %>% 
  names() %>%
  tibble::enframe(name = NULL) %>%
  datatable()
u19 <- u19 %>% 
  # mutate(pl_ht_cm = case_when(is.na(height_cm)~ ht_cm,
  #                             is.na(pl_ht_cm)~height_cm,
  #                             TRUE ~ pl_ht_cm)) %>%
  mutate(wilid = case_when(is.na(wilid) ~ willid,
                              is.na(willid)~ wilid,
                              TRUE ~ wilid))%>% 
  filter(!is.na(pl_ht_cm))

### select subset of columns: for UTILIZATION
# u19.sel <- u19 %>%
#   # glimpse()
#   select(c('date','yr','season', 'site','wilid','stid', 'live_status', 'scheme','height_cm','spp','len_dia'), contains("sht"),contains("x"))

# u19 %>% visdat::vis_miss()

### select subset of columns: for height
u19.ht <- u19 %>%
  # glimpse()
  select(c(file_name, sheet_name, yr, site, season, spp, plot, wilid, stid, live_status, pl_ht_cm))

# 'yr','season', 'spp', 'plot','site','wilid','stid', 'live_status', 'pl_ht_cm'))
# rename, reclass
u19.ht <- u19.ht %>% 
  # mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>% 
  mutate(plot = if_else(condition = is.na(plot), true = "obs", plot))

# clean
u19.ht <- u19.ht %>% 
  # filter(!(is.na(willid) & is.na(wilid))) %>% 
  distinct()

# more cleaning
u19.ht <- u19.ht %>% 
  filter(!is.na(pl_ht_cm)) %>%
  filter(wilid != "NA")

visdat::vis_miss(u19.ht)

##########
## calculate the per wilid height from population of stid
##########
u19.ht <- u19.ht %>% 
  filter(!is.na(pl_ht_cm)) %>% 
  group_by(file_name, yr, wilid, site, spp, plot, season, live_status) %>% 
  summarise(pl_ht_cm = max(pl_ht_cm)) %>% 
  ungroup()

u19.ht <- u19.ht %>% 
  filter(!is.na(wilid))

u19.ht <- u19.ht %>%
  mutate_if(.predicate = is.character,.funs=tolower)

visdat::vis_miss(u19.ht)

u19.ht %>%
  visdat::vis_miss(cluster = TRUE) +
  # visdat::vis_guess() + 
  theme_classic() +
  labs(title = "2019 Spring Height", subtitle = "Missing map for qa/qc") +
  scale_fill_viridis(discrete = TRUE) +
  coord_flip()

# plotly::ggplotly()

2.11.1 Diagnostic plots on the 2018 and 2019

2.12 Combine and QC

2.12.1 Combine 18 and 19 spring data

#
u19.ht <- u19.ht %>% 
  mutate(pl_ht_cm = as.numeric(pl_ht_cm))

u18u19.ht <- bind_rows(u18.ht,u19.ht)

u18u19.ht %>% 
  mutate(site2 = paste0(site,"-",plot)) %>%
  ggTile_yr_season_site2(.) +
  labs(caption = "u18u19.ht")

u18u19.ht %>% 
  # filter(!is.na(site)) %>%
  # select(wilid) %>% 
  # datapasta::vector_paste() %>% 
  filter((wilid %in% c("415", "418", "449", "449", "452")) & is.na(site)) %>%
  # filter(wilid %in% c("415", "418", "449", "449", "452")) %>%
  distinct(wilid, site, yr) %>% 
  arrange(wilid)
## # A tibble: 4 x 3
##   wilid site     yr
##   <chr> <chr> <dbl>
## 1 415   <NA>   2019
## 2 418   <NA>   2019
## 3 449   <NA>   2019
## 4 452   <NA>   2019
# u18u19.ht <- u18u19.ht %>% 
#   filter(!is.na(site))
# missing data check: spring 2018 and 2019
# u18u19.ht %>%
#   mutate(yr = as.character(yr)) %>%
#   group_by(yr, season, site) %>%
#   summarise(n= n()) %>%
#   ggplot(aes(yr, site)) +
#   geom_tile(aes(fill = n), color = 'black') +
#   facet_wrap(~season)

#### Creating an export file to update out of R
# read in the most current version below this section
# create lu for exp trt
# wilid.lu <- w_ht %>%
#   distinct(site, wilid, treat, exp, dam, browse)

# plot.lu <- w_ht %>%
#   distinct(treat, exp, dam, browse) %>% 
#   mutate(plot = tolower(treat)) 

# plot.lu %>% 
#   write_csv("./data/plot_lookup_master.csv")

# wilid.lu.1 <- w_ht %>%
#   distinct(site, wilid, treat, exp, dam, browse)
 
# ## join with info
# wilid.lu.2update <- left_join(p1819.wilidht, wilid.lu) %>% 
#   distinct(site, wilid, treat, exp, dam, browse) %>% 
#   filter(is.na(treat)) %>% 
#   bind_rows(.,wilid.lu)
# 
## wilid.lu.2update %>% 
##   write_csv("./data/wilid_lookup_master.csv") # have manually updated this output, read in below
#############
####### read in the lu

# wilid.lu <- read_csv("./data/wilid_lookup_master.csv",col_types = "ccccccc")

# alt updated lu
wilid.lu <- read_csv("./data/wilid_lookup_master_v2.csv",col_types = "cccc")

wilid.lu <- wilid.lu %>% 
  distinct()

# change willid to wilid  
# make everything lowercase
# w_ht <- w_ht %>%
#   rename(wilid = willid) %>%  
#   mutate_if(.predicate = is.character,.funs=tolower)
## join the fall data with site info lu 
p1819.wilidht <- left_join(p1819.wilidht, wilid.lu)

# fix na for treat
p1819.wilidht <- p1819.wilidht %>%
  # distinct(site, treat) %>% View() # Ichecked and this does correctly convert site = na to obs
  mutate(treat = if_else(condition = is.na(treat),true = "obs", treat)) %>%
  mutate(site2 = paste0(site,"-",treat)) %>% 
  rename(plot = treat)
    
# plot lu
# p1819.wilidht <- left_join(p1819.wilidht, plot.lu, by = "plot")

p1819.wilidht <- p1819.wilidht %>% 
  mutate(season = "fall")
# u18u19.ht <- left_join(u18u19.ht, wilid.lu, by = "wilid") 

## adress missing sites
# u18u19.ht <- u18u19.ht %>% 
#   mutate(site = case_when(is.na(site.x) ~ site.y,
#                           # is.na(site.y) ~ site.x,
#                           TRUE ~ site.x)) %>%
#   select(-c(site.x, site.y)) %>%
#   filter(!is.na(site))

u18u19.ht <- u18u19.ht %>% 
  mutate(plot = if_else(condition = plot == "1",true = "obs","obs")) ## not sure why, but error with plot...

# plot lu
# u18u19.ht <- u18u19.ht %>% 
#   # select(-treat) %>% 
#   left_join(., plot.lu, by = "plot") %>%
#   filter(!is.na(plot)) 

# u18u19.ht %>% visdat::vis_miss()

# add site info to 18 19 spring measurements
# u18u19.ht <- left_join(u18u19.ht, plot.lu, by = "plot") 
# View(u18u19.ht)

# add plot info
# u18u19.ht <- u18u19.ht %>% 
#   left_join(., plot.lu, by = "plot")
### Combine spring and fall for 2018 and 2019
##
p1819.wilidht <- p1819.wilidht %>% 
  mutate(pl_ht_cm = as.numeric(pl_ht_cm))

u18u19.ht <- u18u19.ht %>% 
  filter(wilid != "")

p1819.wilidht <- p1819.wilidht %>%
  filter(live_status != "dead") %>% 
  filter(live_status != "missing")

ht.all18.19 <- bind_rows(p1819.wilidht,u18u19.ht)

## eliminate NA fpr plant ht
ht.all18.19 <- ht.all18.19 %>% 
  filter(!is.na(pl_ht_cm)) %>% 
  rename(species = spp) 
ht.all18.19 <- ht.all18.19 %>% 
  mutate(site = case_when(wilid == "415" & is.na(site) ~ "ghole",
                      wilid == "418" & is.na(site) ~ "ghole",
                      wilid == "449" & is.na(site) ~ "glen",
                      wilid == "452" & is.na(site) ~ "glen",
                      TRUE ~ site)) %>%
  mutate(site2 = paste0(site,"-", plot)) %>% 
  mutate(wilid_full = paste0(site2,"-", wilid))
## ### 2018 2019
# ### try to add spp info to the 18 and 19 
# wilid.spp.lu2 <- read_csv("./data/wilid_spp._lu_v2.csv") %>% 
#   select(c(species, wilid_full))

## address sites without proper site attribution
ht.all18.19 <- ht.all18.19 %>%
  mutate(site = case_when(wilid == "415" & is.na(site) ~ "ghole",
                      wilid == "418" & is.na(site) ~ "ghole",
                      wilid == "449" & is.na(site) ~ "glen",
                      wilid == "452" & is.na(site) ~ "glen",
                      TRUE ~ site)) %>%
  select(-file_name) %>% 
  mutate(site = case_when(site == "lb41" ~ "lb4",
                          site == 'lost c' ~ "lostc",
                          site == "lost l" ~ "lostl",
                          TRUE ~ site)) %>% 
  mutate(site2 = paste0(site,"-", plot)) %>% 
  mutate(wilid_full = paste0(site2,"-", wilid))


ht.all18.19 %>% 
  visdat::vis_miss()

# ht.all18.19 <- ht.all18.19 %>% 
#   rename(plot = treat)
ht.all18.19 %>% 
  group_by(yr, season, site2) %>% 
  tally() %>% 
  ggplot(aes(as.character(yr), site2)) +
  geom_tile(aes(fill = n)) +
  facet_wrap(~season)

Missing data: Yancy fall 2018

ht.all18.19 %>%
  # filter(is.na(site))
  mutate(yr = as.character(yr)) %>% 
  group_by(yr, season, site2) %>%
  summarise(n= n()) %>%
  ggplot(aes(yr, site2)) +
  geom_tile(aes(fill = n), color = 'black') +
  facet_wrap(~season) +
  theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
  theme(axis.text.y = element_text(size = 7)) +
  facet_wrap(~season)

ht.all18.19 %>% 
  mutate(yr = as.character(yr)) %>% 
  ggTile_yr_season_site2() +
  labs(caption = "ht.all18.19")

2.13 Bring 16 17

2.13.1 Fall

## 16
p16.ht <- p16.ht %>% 
  mutate(site2 = paste0(site,"-",plot)) %>% 
  mutate(yr = as.character("2016")) %>% 
  mutate(season = "fall")

p16.ht %>% 
  tabyl(site2) %>% 
  gt() %>% 
  tab_header(title = "n distinct fall 2016 site2")
n distinct fall 2016 site2
site2 n percent
crescent-obs 5 0.012755102
eb1-cc 13 0.033163265
eb1-cx 11 0.028061224
eb1-dc 15 0.038265306
eb1-dx 16 0.040816327
eb1-obs 5 0.012755102
eb2-cc 10 0.025510204
eb2-cx 14 0.035714286
eb2-dc 20 0.051020408
eb2-dx 14 0.035714286
eb2-obs 5 0.012755102
elk-cc 17 0.043367347
elk-cx 18 0.045918367
elk-dc 15 0.038265306
elk-dx 22 0.056122449
elk1-obs 6 0.015306122
elk2-obs 8 0.020408163
elk3-obs 5 0.012755102
elk4-obs 5 0.012755102
elk5-obs 5 0.012755102
ghole-obs 10 0.025510204
halibuff-obs 5 0.012755102
lava-obs 5 0.012755102
lb1-obs 5 0.012755102
lb2-obs 8 0.020408163
lb3-obs 5 0.012755102
lostc-obs 5 0.012755102
lostl-obs 5 0.012755102
oxbow-obs 5 0.012755102
rose-obs 5 0.012755102
slide-obs 6 0.015306122
wb-cc 21 0.053571429
wb-cx 20 0.051020408
wb-dc 21 0.053571429
wb-dx 21 0.053571429
wb1-obs 5 0.012755102
wb2-obs 4 0.010204082
xtal-obs 2 0.005102041
yancy-obs 5 0.012755102
## corect site id data entry error
p17.ht <- p17.ht %>% 
  mutate(site = if_else(site == "wn","wb",site)) %>%   mutate(site2 = paste0(site,"-",plot)) %>% 
  mutate(yr = as.character("2017")) %>% 
  mutate(season = "fall")

####
p16.ht <- p16.ht %>%
  mutate(pl_ht_cm = as.numeric(pl_ht_cm)) 

p17.ht <- p17.ht %>% 
  select(-species)

p17.ht %>% 
  tabyl(site2, season) %>% 
  arrange(site2) %>% 
  gt() %>% 
  tab_header(title = "n distinct fall 2017 site2")
n distinct fall 2017 site2
site2 fall
crescent-obs 16
eb1-cc 34
eb1-cx 28
eb1-dc 53
eb1-dx 59
eb2-cc 18
eb2-cx 37
eb2-dx 45
elk-cc 45
elk-cx 77
elk-dc 37
elk-dx 70
elk-obs 9
elk1-obs 4
elk2-obs 13
elk3-obs 5
elk4-obs 15
elk5-obs 16
ghole-obs 18
glen-obs 16
halibuff-obs 6
lb-obs 53
lb1-obs 13
lb2-obs 22
lb3-obs 16
lostc-obs 17
lostl-obs 15
oxbow-obs 15
rose-obs 12
slide-obs 8
wb-cc 69
wb-cx 72
wb-dc 71
wb-dx 67
wb-obs 14
# p17.ht %>%
#   filter(wilid == "236") %>% filter(site2 == "wn-cx") ## corected

p16p17 <- bind_rows(p16.ht,p17.ht)

p16p17 %>% 
  visdat::vis_dat() +
  labs(caption = "p16p17")

# !!!
p16p17 <- p16p17 %>% 
  distinct() 

p16p17 %>% 
  group_by(yr, season, site) %>%
  summarise(n= n()) %>%
  ggplot(aes(yr, site)) +
  geom_tile(aes(fill = n), color = 'black') +
  theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
  theme(axis.text.y = element_text(size = 9)) +
  facet_wrap(~season) +
  theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 9)) +
  theme(axis.text.y = element_text(size = 7)) +
  facet_wrap(~season) +
  scale_fill_viridis(option = "C") +
  labs(caption = "Combined 2016 and 2017 fall height")

2.13.2 Spring

## spring 16 17
u16.ht <- u16.ht %>%
  mutate(season = "spring") %>% 
  mutate(yr = as.character("2016")) %>% 
  mutate(pl_ht_cm = as.numeric(pl_ht_cm))

u17.ht <- u17.ht %>% 
  mutate(season = "spring") %>% 
  mutate(yr = as.character("2017"))

## Lewis's other entry data
ht_lm_entry_17 <- ht_lm_entry_17 %>%
  select(-c(exp,dam,browse)) %>% 
  mutate(site2 = paste0(site,"-",plot)) %>% 
  mutate(wilid_full = paste0(site2,"-",wilid))

###
u17.ht %>% 
  distinct(site2) %>% 
  gt() %>% 
  tab_header(title = "distinct site")
distinct site
site2
crescent-obs
eb1-cc
eb1-cx
eb1-dc
eb1-dx
eb1-obs
eb2-cc
eb2-cx
eb2-dc
eb2-dx
eb2-obs
elk-cc
elk-cx
elk-dc
elk-dx
elk1-obs
elk2-obs
elk3-obs
elk4-obs
elk5-obs
ghole-obs
glen-obs
halibuff-obs
lava-obs
lb1-obs
lb2-obs
lb3-obs
lb4-obs
lostc-obs
lostl-obs
oxbow-obs
rose-obs
slide-obs
wb-cc
wb-cx
wb-dc
wb-dx
wb1-obs
wb2-obs
wb4-obs
yancy-obs
###
u17_lmentry <- u17.ht %>% 
  mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>% 
  bind_rows(., ht_lm_entry_17) %>% 
  # select(-species) %>% 
  # select(-live_dead) %>% 
  distinct()

# u17_lmentry %>%  distinct(wilid) %>% View()

2.13.2.1 bind rows p16, p17, all ht 18 19

# ht.all18.19 %>% distinct(wilid) %>% View()

ht.all18.19 <- ht.all18.19 %>% 
  mutate(wilid = case_when(wilid == "" & site == "lb5" ~ '1070'))

ht.all18.19 %>%
  filter(is.na(wilid))
## # A tibble: 1,290 x 10
##       yr site  species wilid live_status pl_ht_cm plot  site2  season wilid_full
##    <dbl> <chr> <chr>   <chr> <chr>          <dbl> <chr> <chr>  <chr>  <chr>     
##  1  2018 cresc geyer   <NA>  live             108 obs   cresc~ fall   cresc-obs~
##  2  2018 cresc pseudo  <NA>  live              62 obs   cresc~ fall   cresc-obs~
##  3  2018 eb1   beb     <NA>  live             204 dc    eb1-dc fall   eb1-dc-24 
##  4  2018 eb1   bebb    <NA>  live             295 dx    eb1-dx fall   eb1-dx-5  
##  5  2018 eb1   bebb    <NA>  live             368 dx    eb1-dx fall   eb1-dx-6  
##  6  2018 eb1   boothi  <NA>  live             164 dx    eb1-dx fall   eb1-dx-1  
##  7  2018 eb1   boothi  <NA>  live             252 dc    eb1-dc fall   eb1-dc-11 
##  8  2018 eb1   boothi  <NA>  live             252 dx    eb1-dx fall   eb1-dx-11 
##  9  2018 eb1   boothi  <NA>  live             206 dc    eb1-dc fall   eb1-dc-12 
## 10  2018 eb1   boothi  <NA>  live             206 dx    eb1-dx fall   eb1-dx-12 
## # ... with 1,280 more rows
#### bind rows p16, p17, all ht 18 19
p.16.17.18.19 <- ht.all18.19 %>%
  rename(live_dead = live_status) %>% 
  mutate(yr = as.character(yr)) %>% 
  select(-species) %>% 
  bind_rows(., p16p17) %>% 
  distinct()

p.16.17.18.19 %>% 
  visdat::vis_dat()

## add in the other lewis ht entry file
p.16.17.18.19 <- p.16.17.18.19 %>% 
  bind_rows(., u17_lmentry) %>%
  select(-live_dead) %>%
  distinct()

p.16.17.18.19 %>% 
  visdat::vis_dat()

# p.16.17.18.19 %>%  distinct(wilid) %>% View()

u18u19.ht <- u18u19.ht %>% 
  mutate(yr = as.character(yr))

ht.all.16.17.18.19 <- p.16.17.18.19 %>% 
  bind_rows(., u18u19.ht) %>% 
  select(-c(file_name,live_status)) %>% 
  distinct()
ht.all.16.17.18.19 %>% 
  group_by(yr, season, site) %>%
  summarise(n= n()) %>%
  ggplot(aes(yr, site)) +
  geom_tile(aes(fill = n), color = 'black') +
  theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
  theme(axis.text.y = element_text(size = 9)) +
  facet_wrap(~season) +
  theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 9)) +
  theme(axis.text.y = element_text(size = 7)) +
  facet_wrap(~season) +
  scale_fill_viridis(option = "C") +
  labs(caption = "Combined 2016 and 2017 fall height")

ht.all.16.17.18.19 <- ht.all.16.17.18.19 %>%
  mutate(site = case_when(site == "halibuff" ~ "hailbuff",
                          site == "cresc" ~ 'crescent',
                          site == "crescent" ~ "crescent",
                          site == "lost l" ~ "lostl",
                          site == "lost c" ~ "lostc",
                          site == "xtal" ~ "crystal",
                          TRUE ~ site))
  
ht.all.16.17.18.19 <- ht.all.16.17.18.19 %>% 
  mutate(site = case_when(wilid == "415" & is.na(site) ~ "ghole",
                      wilid == "418" & is.na(site) ~ "ghole",
                      wilid == "449" & is.na(site) ~ "glen",
                      wilid == "452" & is.na(site) ~ "glen",
                      TRUE ~ site)) %>%
  mutate(site2 = paste0(site,"-", plot)) %>% 
  mutate(wilid_full = paste0(site2,"-", wilid))

## bind in the dcc from raw files
dcc.ht.fa.spr <- dcc.ht.fa.spr %>% 
  mutate(yr = as.character(yr))

dcc.ht.fa.spr %>% 
  visdat::vis_dat()

dcc.16.17.18.19 <- ht.all.16.17.18.19 %>% 
  mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>% 
  mutate(yr = as.character(yr)) %>% 
  # select(-c(browse, exp, dam)) %>% 
  bind_rows(., dcc.ht.fa.spr) %>% 
  mutate(site2 = paste0(site,"-",plot))

# dcc.16.17.18.19 <- dcc.16.17.18.19 %>% 
#   left_join(.,wilid.spp.lu2) 

dcc.16.17.18.19 <- dcc.16.17.18.19 %>%
  select(-c(spp, species)) %>% 
  distinct() 

dcc.16.17.18.19 %>% 
  visdat::vis_miss()

dcc.16.17.18.19 %>%
  group_by(yr, season, site) %>%
  summarise(n= n()) %>%
  ggplot(aes(yr, site)) +
  geom_tile(aes(fill = n), color = 'black') +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
  theme(axis.text.y = element_text(size = 9)) +
  facet_wrap(~season) +
  theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 9)) +
  theme(axis.text.y = element_text(size = 7)) +
  facet_wrap(~season)

dcc.16.17.18.19 <- dcc.16.17.18.19 %>% 
  mutate(yr = as.character(yr))


## bind in other dcc object
dcc.16.17.18.19 <- ht.all18.19 %>% 
  mutate(yr = as.character(yr)) %>% 
  bind_rows(.,dcc.16.17.18.19) %>%
  select(-c(species, live_status)) %>% 
  distinct()

### select
dcc.16.17.18.19 <- dcc.16.17.18.19 %>% 
  select(c(yr, season, site, site2, wilid, wilid_full, pl_ht_cm, plot)) 

##
w_ht <- w_ht %>%
  # rename(plot = treat) %>%
  rename(yr = year) %>%   
  mutate(yr = as.character(yr)) %>% 
  mutate(wilid_full = paste0(site,"-",plot,"-", wilid))

w_ht <- w_ht %>% 
  select(c(yr, season, site, site2, wilid, wilid_full, pl_ht_cm, plot))%>% 
  mutate(wilid = as.character(wilid)) 

## join in spp
# w_ht <- left_join(w_ht, wilid.spp.lu2)

# FALL 18 and 19 plus DCC
ht.combined <- dcc.16.17.18.19 %>% 
  mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>% 
  bind_rows(., w_ht) %>% 
  distinct()

## clean
ht.combined <- ht.combined %>% 
  mutate(site = case_when(site == "halibuff" ~ "hailbuff",
                          TRUE ~ site)) %>% 
  distinct()

# 
# ht.combined %>%
#   visdat::vis_miss()
ht.combined %>%
  group_by(yr, season, site) %>%
  summarise(n= n()) %>%
  ggplot(aes(yr, site)) +
  geom_tile(aes(fill = n), color = 'black') +
  theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
  theme(axis.text.y = element_text(size = 7)) +
  facet_wrap(~season) +
  labs(caption = "Combined DCC, 16-19")

dcc.ht.fa.spr %>%
  group_by(yr, season, site) %>%
  summarise(n= n()) %>%
  ggplot(aes(yr, site)) +
  geom_tile(aes(fill = n), color = 'black') +
  theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
  theme(axis.text.y = element_text(size = 7)) +
  facet_wrap(~season) +
  labs(title = "DCC data")

ht.combined <- ht.combined %>%
  filter(pl_ht_cm < 700)

# ht.combined %>% 
#   distinct(site) %>% 
#   View()


ht.combined %>% 
  group_by(site2, season, yr) %>% 
  tally() %>% 
  ggplot(aes(yr, site2)) +
  geom_tile(aes(fill = n)) +
  theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
  theme(axis.text.y = element_text(size = 7)) +
  facet_wrap(~season) +
  labs(caption = "Combined DCC, 16-19")

### the missing spring dcc data
# note: this is supplanted by code that pulls in 
# both fa and spr data from raw dcc files

# nsf.ht.spr <- nsf.ht.spr %>%
#   mutate(yr = as.character(yr), wilid = as.character(wilid)) %>%
#   mutate(season = "spring") %>%
#   mutate(site2 = paste0(site,"-",plot))
# nsf.ht.spr <- left_join(nsf.ht.spr, plot.lu, by = "plot")
# nsf.ht.spr %>% filter(is.na(species))

nsf.ht.fa.expobs <- nsf.ht.fa.expobs %>% 
  mutate(yr = as.character(yr)) %>% 
  mutate(wilid = as.character(wilid)) %>% 
  mutate(wilid_full = paste0(site,"-",plot,"wilid")) %>% 
  mutate(site2 = paste0(site,"-",plot))
# ht.combined <- bind_rows(ht.combined, nsf.ht spring and fall) 
ht.combined <- bind_rows(ht.combined, nsf.ht.fa.expobs) %>% 
  distinct()


## use species lookup table to 
# 
# ht.combined %>% 
#   distinct(species) %>% 
#   write_csv("./data/lu_species_code.csv")
  
# clean
ht.combined <- ht.combined %>%
  # mutate(species = case_when(species == 'beb' ~ "bebb",
  #                            species == 'drumm' ~ "drum",
  #                            species == 'boothii' ~ "booth",
  #                            species == 'boothi' ~ "booth",
  #                            species == 'lassiandra' ~ "lass",
  #                            TRUE ~ species)) %>% 
  # filter(species != "?") %>%
  mutate(plot = if_else(is.na(plot),"obs",plot)) %>%   # filter(!(wilid == "570" & pl_ht_cm == 283)) %>% 
  filter(!is.na(site)) %>% 
  mutate(site = if_else(site == "crescent","cresc", site)) %>%
  mutate(site2 = paste0(site,"-",plot)) %>% 
  mutate(wilid_full = paste0(site,"-",plot,"-",wilid))

# visdat::vis_miss(ht.combined)

ht.combined %>%
  group_by(yr, season, site) %>%
  summarise(n= n()) %>%
  ggplot(aes(yr, site)) +
  geom_tile(aes(fill = n), color = 'black') +
  facet_wrap(~season)+
  theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
  theme(axis.text.y = element_text(size = 7)) +
  facet_wrap(~season)

# ## find missing data
ht.combined %>%
  group_by(yr, season, site) %>%
  summarise(n= n()) %>%
  ggplot(aes(yr, site)) +
  geom_tile(aes(fill = n), color = 'black') +
  facet_wrap(~season) +
  theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
  theme(axis.text.y = element_text(size = 7)) +
  facet_wrap(~season)

# ggsave("./output/output4qaqc/height_heatmap20191204 1937.pdf", width = 11, height = 8.5)

# w_ht %>%
#   group_by(yr, season, site) %>%
#   summarise(n= n()) %>%
#   ggplot(aes(yr, site)) +
#   geom_tile(aes(fill = n), color = 'black') +
#   facet_wrap(~season)
#clean sites
ht.combined <- ht.combined %>% 
  mutate(site = case_when(site == "lb41" ~ "lb4",
                          site == 'lost c' ~ "lostc",
                          site == "lost l" ~ "lostl",
                          TRUE ~ site)) %>% 
  mutate(site = case_when(site == "halibuff" ~ "hailbuff",
                          site == "cresc" ~ 'crescent',
                          site == "crescent" ~ "crescent",
                          site == "lost l" ~ "lostl",
                          site == "lost c" ~ "lostc",
                          site == "xtal" ~ "crystal",
                          TRUE ~ site))

# ht.combined <- ht.combined %>% 
#   mutate(pl_ht_cm = round(pl_ht_cm,0))

ht.combined <- ht.combined %>% 
  # mutate(wilid = as.numeric(round(pl_ht_cm,0))) %>%
  mutate(wilid = as.character(wilid))

ht.combined <- ht.combined %>% 
  filter(wilid != "0") %>% 
  distinct() 

ht.combined <- ht.combined %>% 
  mutate(site = case_when(site == "hailbuff"~ "hailbuf",
                          TRUE ~ site))

ht.combined %>% 
  tabyl(site) %>% 
  datatable()
#
ht.combined %>%
  group_by(yr, season, site2) %>%
  summarise(n= n()) %>%
  ggplot(aes(yr, site2)) +
  geom_tile(aes(fill = n), color = 'black') +
  facet_wrap(~season) +
  theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
  theme(axis.text.y = element_text(size = 7)) +
  facet_wrap(~season)

#### lu merge
# dcc.wilid.spp.lu
# wilid.spp.lu <- wilid.spp.lu %>% 
#   mutate(wilid_full = paste0(site, "-", plot, "-", wilid)) 

## create a lu with dcc raw and v1 wilid spp list
# wilid.spp.lu.2 <- bind_rows(wilid.spp.lu, dcc.wilid.spp.lu) %>% 
#   distinct()

# write_csv(wilid.spp.lu.2, "./data/wilid_spp._lu_v2.csv")

2.13.3 site problems 18 19 spring

## attempt to get the 18 19 data back

ht.combined  %>%  visdat::vis_dat()

####### 
sp1819 <- u18u19.ht %>% 
  # select(-c('file_name','exp',"dam","browse")) %>% 
  rename(species = spp) %>% 
  mutate(yr = as.character(yr)) %>%
  # distinct(live_status)
  filter(live_status == "live" | live_status == "live" | is.na(live_status)) %>% 
  select(-c(species,file_name))

## bind in the spring1819
ht.combined <- ht.combined %>% 
  bind_rows(., sp1819) 

ht.combined <- ht.combined %>%
  mutate(site2 = paste0(site,"-",plot)) %>% 
  mutate(wilid_full = paste0(site2,"-",wilid))

ht.combined %>% 
  visdat::vis_dat()

ht.combined %>% 
  # filter(plot != "obs") %>% 
  group_by(yr, season, site2) %>%
  summarise(n= n()) %>%
  ggplot(aes(yr, site2)) +
  geom_tile(aes(fill = n), color = 'black') +
  facet_wrap(~season)

# # type convert
# ht.combined.fin2 <- ht.combined.fin2 %>% 
#   mutate(exp = as.character(exp), dam = as.character(dam), browse = as.character(browse))


ht.combined <- ht.combined %>%
  # filter(season == "spring") %>% 
  mutate(site_wilid = paste0(site,"-",wilid)) %>% 
  mutate(site = case_when(site == "lb41" ~ "lb4",
                          site == 'lost c' ~ "lostc",
                          site == "lost l" ~ "lostl",
                          TRUE ~ site))

ht.combined <- ht.combined %>% 
  select(yr, site, wilid, season, plot, pl_ht_cm, site2, wilid_full) %>%
  distinct()
  
ht.combined %>% 
  filter(is.na(site))
## # A tibble: 3 x 8
##   yr    site  wilid season plot  pl_ht_cm site2  wilid_full
##   <chr> <chr> <chr> <chr>  <chr>    <dbl> <chr>  <chr>     
## 1 2019  <NA>  415   spring obs        174 NA-obs NA-obs-415
## 2 2019  <NA>  418   spring obs        116 NA-obs NA-obs-418
## 3 2019  <NA>  449   spring obs         97 NA-obs NA-obs-449
# ht.combined %>% 
#   distinct(site) %>% View()

# ht.combined <- ht.combined %>%
#   # filter(season == "spring") %>% 
#   mutate(site = case_when(site == "lb41" ~ "lb4",
#                           site == 'lost c' ~ "lostc",
#                           site == "lost l" ~ "lostl",
#                           TRUE ~ site)) %>% 
#   select(yr, site, wilid, season, site, plot, pl_ht_cm, species)
  
# ht.combined.f3 <- bind_rows(ht.combined.fin2,ht.combined.final) %>% 
#   distinct()
# filter(plot != "obs") %>%
ht.combined %>% 
  # filter(plot == "obs") %>% 
  group_by(yr, season, site) %>%
  summarise(n= n()) %>%
  ggplot(aes(yr, site)) +
  geom_tile(aes(fill = n), color = 'black') +
  # facet_grid(season~yr) +
  facet_wrap(~season)

ht.combined %>%
  distinct(site, wilid, plot) %>% 
  arrange(wilid, site) %>% 
  datatable(filter = "top")
ht.combined <- 
  ht.combined %>% 
  filter(!is.na(wilid))
  # filter(plot == 'obs' & site == "eb2") %>%
  # filter(plot == 'obs' & site == "eb2")

ht.combined <- ht.combined %>% 
  mutate(plot = if_else(is.na(plot), "obs", plot)) 
         
ht.combined %>% visdat::vis_dat()

## more site name cleanup
ht.combined <- ht.combined %>% 
  mutate(site = case_when(site == "cresc" ~ "crescent",
                          site == "hailbuff" ~ "hailbuf",
                          TRUE ~ site))

## check site
# ht.combined.f3 %>%
#   distinct(site) %>%  View()

## Create site2
ht.combined <- ht.combined %>% 
  mutate(site2 = paste0(site,"-",plot))

# ht.combined.f3 <- ht.combined.f3 %>% 
#   filter(!is.na(site))

2.13.4 Data checks on combined fall and spring datasets, all years

2.13.4.1 Issues Tom found regarding wb out of range data

## Fa raw DCC. Note error in wilid=158, 2010 
 read_delim("./data/NSF_DataArchive20180208/Plant_level_fall_current_annual_growth_and_height_on_experimental_plot/willows-plantCAGfall2002-2015.txt", delim = " ") %>% 
  select(year, site, plot, species, willid, height) %>% 
  rename(yr = year, wilid = willid, pl_ht_cm = height) %>% 
  mutate(season = "fall") %>% 
  filter(wilid == "158")
## # A tibble: 13 x 7
##       yr site  plot  species wilid pl_ht_cm season
##    <dbl> <chr> <chr> <chr>   <dbl>    <dbl> <chr> 
##  1  2002 wb    dx    beb       158      127 fall  
##  2  2003 wb    dx    beb       158      160 fall  
##  3  2004 wb    dx    beb       158      180 fall  
##  4  2005 wb    dx    beb       158      219 fall  
##  5  2006 wb    dx    beb       158      240 fall  
##  6  2007 wb    dx    beb       158      280 fall  
##  7  2008 wb    dx    beb       158      298 fall  
##  8  2009 wb    dx    beb       158      327 fall  
##  9  2010 wb    dx    beb       158     1362 fall  
## 10  2012 wb    dx    beb       158      382 fall  
## 11  2013 wb    dx    beb       158      136 fall  
## 12  2014 wb    dx    beb       158      364 fall  
## 13  2015 wb    dx    beb       158      380 fall
# check specific records Tom identified

ht.combined %>%
  distinct() %>% 
  filter (yr == '2010' & site == "wb") %>%
  # datatable()
  filter(wilid == "158")
## # A tibble: 3 x 8
##   yr    site  wilid season plot  pl_ht_cm site2 wilid_full
##   <chr> <chr> <chr> <chr>  <chr>    <dbl> <chr> <chr>     
## 1 2010  wb    158   spring dx         338 wb-dx wb-dx-158 
## 2 2010  wb    158   fall   dx         362 wb-dx wb-dx-158 
## 3 2010  wb    158   fall   dx        1362 wb-dx wb-dx-158

Two issues here, now fixed: 1.plant height of 1362 would appear to be a data entry error in the raw file. This is 2010 data drawn from the DCC (library repository), so I don’t have the raw data to check. The value is in the raw file, not an artifact introduced in the data processing. I’ve changed it to “362”.

ht.combined %>%
  distinct() %>% 
  filter (yr == '2010' & site == "wb") %>%
  # datatable()
  filter(wilid == "158") %>% 
  datatable(caption = "questionable records flagged by TH")
## correct the height
ht.combined <- ht.combined %>%
  mutate(pl_ht_cm = if_else(pl_ht_cm == 1362, 362, pl_ht_cm)) %>% 
  mutate(pl_ht_cm = if_else(pl_ht_cm == 2877, 287, pl_ht_cm))

# these should be corrected in raw files?

1.Notice the different species: booth vs boothi, beb vs bebb. I ran code to find and replace such issues, but had it too early in the chain of transformations involved in the import and merging of the raw data. I have steps to remove duplicates, but b/c of the different species, they’re not the same.

## add full id to join with species
# wilid and site are not unique
ht.combined <- ht.combined %>% 
  mutate(wilid_full = paste0(site2,"-",wilid)) %>% 
  filter(!is.na(pl_ht_cm))
## cleaning

ht.combined <- ht.combined %>%
  mutate(wilid_full = paste0(site2,"-",wilid))
  
# Eliminate species codes, find distinct, then join in the species from LU

ht.combined <- ht.combined %>%
  distinct(yr, site, wilid, season, plot, pl_ht_cm, site2, wilid_full)

ht.combined <- ht.combined %>% 
  left_join(.,lu_spp_site2, by = "wilid_full") 

ht.combined %>% 
  filter(is.na(species)) %>% 
  select(yr,season,wilid, wilid_full, pl_ht_cm) %>% 
  datatable(caption = "wilid_full lacking species. Manually check and update lookup table")
# manually correct missing sites in raw files and
# regenerate the full_id
ht.combined <- ht.combined %>%
  mutate(site = case_when(wilid == "415" & is.na(site) ~ "ghole",
                      wilid == "418" & is.na(site) ~ "ghole",
                      wilid == "449" & is.na(site) ~ "glen",
                      wilid == "452" & is.na(site) ~ "glen",
                      TRUE ~ site)) %>%
  mutate(site2 = paste0(site,"-", plot)) %>% 
  mutate(wilid_full = paste0(site2,"-", wilid))

## still issues with wilid decimal points
ht.combined <-  ht.combined %>% 
  mutate(wilid = str_remove(.$wilid, ".0")) 

## !!  
ht.combined <- ht.combined %>% 
  mutate(wilid = as.numeric(wilid)) %>% 
  filter(!is.na(wilid)) %>% 
  filter(wilid != "0") %>%
  mutate(wilid_full = paste0(site,"-",plot,"-",wilid)) %>% 
  select(-species) %>% 
  distinct()

# problem records: plot
ht.combined <- ht.combined %>% 
  mutate(plot  = case_when(site == "crescent" & plot == "cc" ~ "obs",
                           TRUE ~ plot)) %>%
  mutate(site2 = paste0(site,"-",plot)) %>% 
  mutate(wilid_full = paste0(site2,"-",wilid))
  # mutate(plot = case_when(wilid_full == "crescent-cc-86" ~ "obs",
  #           TRUE ~ plot)) %>% 
  # filter(wilid_full == "crescent-cc-86") # was a problem

ht.combined %>% visdat::vis_dat()

ht.combined %>% 
  ggTile_yr_season_site2() +
  labs(caption = "ht.combined")

## still missing species. Export records
# ht.combined %>%
#   left_join(.,lu_spp_site2, by = "wilid_full") %>%
#   filter(is.na(species)) %>%
#   select(yr,season,wilid, wilid_full, pl_ht_cm) %>%
#   distinct(wilid_full) %>%
#   write_csv("./data/lu_spp_full_wilid_MANUALLY_figureout_and_add_wilid_full.csv")

2.13.5 QA QC checks on combined data

ht.combined %>%
  distinct(wilid) %>% 
  datatable(caption = "distinct wilid")
ht.combined %>%
  distinct(wilid_full) %>% 
  datatable(caption = "distinct wilid_full")
ht.combined %>%
  distinct(site) %>% 
  datatable(caption = "distinct site2")
##
ggTile_yr_season_site2(df = ht.combined)

2.13.6 Add beaver occupancy info from lookup

## create site2 beaver occ lu
### export site 2 lu
# ht.combined.f3 %>% 
#   distinct(site, plot, site2) %>% 
#   write_csv("./data/lu_site_site2_beav.csv")
## manually update with LM input
# Beaver currently occupy crystal (2015?-2019) and wb4 (2017-2019).
# 
# Lb4, lb5, and lb6 (2016/2017-2018), along with elk4 and elk5 (not sure of dates but not currently occupied). 

# lu_site_beav <- read_csv("./data/lu_site_site2_beav.csv")

2.13.7 species and plot lu join

## plt join

# ## create lu for full id and spp
# ht.combined %>%
#   left_join(., lu_wilid_full_spp) %>%
#   distinct(wilid_full, species) %>%
#   write_csv("./data/lu_wilid_full_spp.csv")

# !!!
## read in the updated 
# lu_wilid_full_spp <- read_csv("./data/lu_wilid_full_spp.csv")  

# ht.combined %>% 
#   visdat::vis_dat()

ht.combined <- ht.combined %>% 
  left_join(.,lu_wilid_full_spp) 
  
ht.combined <- ht.combined %>% 
  left_join(.,plot.lu) %>% 
  select(-treat)

## join bv beav
ht.combined <- ht.combined %>% 
  left_join(., lu_site_beav) 

ht.combined %>% 
  group_by(yr, site, plot) %>% 
  tally() %>% 
  ggplot(aes(yr,site)) +
  geom_tile(aes(fill = n)) +
  facet_wrap(~plot)

ht.combined %>% 
  # mutate(wilid = str_remove(.$wilid, ".0"))
  filter(is.na(species))
## # A tibble: 2,096 x 18
##    yr    site  wilid season plot  pl_ht_cm site2 wilid_full species spp_notes
##    <chr> <chr> <dbl> <chr>  <chr>    <dbl> <chr> <chr>      <chr>   <lgl>    
##  1 2016  wb2       1 fall   obs        239 wb2-~ wb2-obs-1  <NA>    NA       
##  2 2016  wb        2 fall   cx         131 wb-cx wb-cx-2    <NA>    NA       
##  3 2016  wb        2 fall   cx         376 wb-cx wb-cx-2    <NA>    NA       
##  4 2016  wb        7 fall   cx         232 wb-cx wb-cx-7    <NA>    NA       
##  5 2016  wb        1 fall   dc         254 wb-dc wb-dc-1    <NA>    NA       
##  6 2016  wb        1 fall   cc         190 wb-cc wb-cc-1    <NA>    NA       
##  7 2016  wb        4 fall   cc         108 wb-cc wb-cc-4    <NA>    NA       
##  8 2016  wb        7 fall   cc         172 wb-cc wb-cc-7    <NA>    NA       
##  9 2016  wb        5 fall   cc          70 wb-cc wb-cc-5    <NA>    NA       
## 10 2016  wb        3 fall   dc         150 wb-dc wb-dc-3    <NA>    NA       
## # ... with 2,086 more rows, and 8 more variables: exp <chr>, dam <chr>,
## #   browse <chr>, beav19 <chr>, beav18 <chr>, beav17 <chr>, beav16 <chr>,
## #   beav15 <chr>
ht.combined %>% visdat::vis_dat()

2.14 Export

ht.combined %>%
  write_csv("./output/processed_data/ht_combined20191219_0126.csv")

2.15 Plots

## summarize by site2 and year using skimr
summary.by.site2 <- ht.combined %>% 
  # filter(wilid == "924") %>% 
  filter(!is.na(site)) %>% 
  mutate(year = as.factor(yr)) %>% 
  group_by(site, plot, site2,yr) %>%
  nest() %>% 
  mutate(skim2 = map(data,skim)) %>% 
  unnest(skim2) %>% 
  ungroup()
## line plot
summary.by.site2 %>%
  mutate(yr = as.numeric(yr)) %>% 
  filter(skim_variable == 'pl_ht_cm') %>%
  filter(plot != "obs") %>% 
  # filter(!(site2 == "elk-dx" & yr == 2019)) %>% 
  ggplot(aes(yr)) +
  # geom_ribbon(aes(ymin = numeric.p25, ymax = numeric.p75, fill = treat), alpha = 0.21) +
  geom_line(aes(y=numeric.p50, color = plot)) +
  geom_point(aes(y=numeric.p50, color = plot)) +
  geom_pointrange(aes(y = numeric.p50, ymin = numeric.p25, ymax = numeric.p75, color = plot), alpha = 0.31) +
  facet_wrap(~site) +
  theme_minimal() +
  labs(y="Median willow height (cm)", x = "Year", caption = "Willow height at experimental sites")

# ggsave("./output/median_willow_height_20191205_0924.png", width = 7, height = 5.5)
## working on this...
# summary.by.site2 %>%
#   filter(contains(match = ""))
#   filter(stat == "n_unique") %>% 
#   ggplot(aes(variable,value)) +
#   geom_point() +
#   theme(axis.text.x = element_text(angle=45, hjust=1, face="bold", color="black", size=10)) +
#   facet_wrap(~site2, ncol=4)
##
summary.by.site2 %>%
  select(yr, plot, site2, skim_variable, contains("numeric")) %>% 
  filter(!(site2 == "elk-dx" & yr == 2019)) %>% 
  filter(skim_variable == "pl_ht_cm") %>% 
  filter(plot != "obs") %>%  
  ggplot(aes(yr, site2)) +
  geom_tile(aes(fill = numeric.mean), color = 'gray70') +
  scale_fill_viridis() +
  theme_light() +
  labs(x = "Year", y = "", subtitle = "Mean willow height", fill = "Height (cm)", caption = getwd())

# ggsave("./output/median_willow_height_HM_20191205_0924.png", width = 7, height = 7)

summary.by.site2 %>%
  select(yr, plot, site2, skim_variable, contains("numeric")) %>% 
  filter(!(site2 == "elk-dx" & yr == 2019)) %>% 
  filter(skim_variable == "pl_ht_cm") %>% 
  filter(plot != "obs") %>%  
  ggplot(aes(yr, plot)) +
  geom_tile(aes(fill = numeric.mean)) +
  scale_fill_viridis() +
  theme_light() +
  facet_wrap(~site2) +
  labs(x = "Year", y = "", subtitle = "Mean willow height", fill = "Height (cm)", caption = getwd())

# ggsave("./output/median_willow_height_HM_20191205_0924.png", width = 7, height = 7)

2.15.0.1 Species lu

## create species lookup
# ht.combined %>% 
#   mutate(species = case_when(species == 'beb' ~ "bebb",
#                              species == 'drumm' ~ "drum",
#                              species == 'boothii' ~ "booth",
#                              species == 'boothi' ~ "booth",
#                              species == 'lassiandra' ~ "lass",
#                              species == 'plantifolia' ~ "plan",
#                              species == 'planifolia' ~ "plan",
#                              
#                              TRUE ~ species)) %>% 
#   # filter(is.na(species))
#   # filter(species == "na")
#   distinct(site2, wilid, species) %>% 
#   filter(!is.na(species)) %>%
#   filter(species != "na") %>%
#   mutate(wilid_full = paste0(site2,"-",wilid)) %>%
#   select(wilid_full, species) %>% 
#   write_csv("./data/lu_spp_site2.csv")

3 Notes

The metadata csv included in the DCC repository does not include all the variables.

In the DCC data, “browse = 0” indicates experimental exclosure treatment. For example, see willid 1 in EB1. The plot type is listed in the 2018 dataset as “dx”. In the 2001-2017 data, it’s dam == 1, browse == 0

In the supplemental material accompanying the 2013 Marshall et al. ProcRoySoc paper, the willid are attributed with “dammed” and “UNbrowsed”. This is confusing, since for “dam”, 1 is the experimental treatment, but for “browse”, 1 is the control.

4 Sandbox

#### old
## Import 2018 Data
prod.path <- "data/raw/production/Exp_2018_Production_Height_20181220.xlsx"

# the following seems to work for taking all of the tabs, importing them into a single tibble
# B/c of parsing errors, everything is brought in as char, so need to type-fix.
prod18.df <- prod.path %>% 
  excel_sheets() %>% 
  set_names() %>%
  map(read_excel, path = prod.path, skip = 1, col_types = 'text') %>% 
  map(clean_names) %>% 
  map_df(.f = bind_rows, .id = "FNAME")

# I could theoretically type set on import through the 'col_types' arg, BUT some of the tabs have 126 columns, some 127... Not sure which columns differ. A different puzzle to solve at some point :)
## plot the count of observations
w_ht01_17 %>%
  mutate(year = as.factor(year)) %>% 
  group_by(season, year) %>% 
  summarise(cnt = n()) %>% 
  ggplot(aes(year, cnt)) + 
  geom_bar(stat = 'identity', aes(fill = season), position = "dodge", color = "black") +
  theme_bw()
# There's only two years (01,17) with Spring measurements...