Updated: 19 December, 2019
This document evaluates willow height measurement data from project inception through 2019. Data include those archived on the “Digital Collections of Colorado” (DCC) library site and more recent (2015-2019) data. These analyses are aimed at identifying potential issues such as:
DCC links:
Plant level fall current annual growth and height on experimental plots in Yellowstone’s Northern Range, 2002 - 2015: http://hdl.handle.net/10217/173649
Plant level fall current annual growth and height on observation plots in Yellowstone’s Northern Range, 2008 - 2015: http://hdl.handle.net/10217/173657
Associated Publications: Marshall Kristin N., N. Thomson Hobbs and David J. Cooper. Stream Hydrology Limits Recovery of Riparian Ecosystems After Wolf Reintroduction. Proceedings of the Royal Society. B 280, issue 1756 (April 7, 2013): 20122977. http://dx.doi.org/10.1098/rspb.2012.2977
Marshall, Kristin N., David J. Cooper, N. Thompson Hobbs. Interactions Among Herbivory, Climate, Topography and Plant Age Shape Riparian Willow Dynamics in Northern Yellowstone National Park, USA. Journal of Ecology 102, issue 3 (May 2014): 667-677. http://dx.doi.org/10.1111/1365-2745.12225
##### Function to read in sheets and tabs
read_sheets <- function(file){
xlsx_file <- file
xlsx_file %>%
excel_sheets() %>%
set_names() %>%
map_df(read_excel, path = xlsx_file, .id = 'sheet_name', trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:DV46") %>%
mutate(file_name = file) %>%
select(file_name, sheet_name, everything())
}
##
read_sheets_prod <- function(file){
xlsx_file <- file
xlsx_file %>%
excel_sheets() %>%
set_names() %>%
map_df(read_excel, path = xlsx_file, .id = 'sheet_name', trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:Dw136") %>%
mutate(file_name = file) %>%
select(file_name, sheet_name, everything())
}
read_sheets_util <- function(file){
xlsx_file <- file
xlsx_file %>%
excel_sheets() %>%
set_names() %>%
map_df(read_excel, path = xlsx_file, .id = 'sheet_name', trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:DQ170") %>%
mutate(file_name = file) %>%
select(file_name, sheet_name, everything())
}
## read xlsx then csv cache
read_then_csv <- function(sheet, path) {
pathbase <- path %>%
basename() %>%
tools::file_path_sans_ext()
path %>%
read_excel(sheet = sheet) %>%
write_csv(paste0(pathbase, "-", sheet, ".csv"))
}
## plotting
ggTile_yr_season_site2 <- function(df){
df %>%
group_by(yr, season, site2) %>%
summarise(n= n()) %>%
ggplot(aes(yr, site2)) +
geom_tile(aes(fill = n), color = 'white') +
viridis::scale_fill_viridis(option = "B") +
facet_wrap(~season) +
theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
theme(axis.text.y = element_text(size = 7)) +
facet_wrap(~season)}
ggTile_yr_season_site <- function(df){
df %>%
group_by(yr, season, site) %>%
summarise(n= n()) %>%
ggplot(aes(yr, site)) +
geom_tile(aes(fill = n), color = 'white') +
viridis::scale_fill_viridis(option = "D") +
facet_wrap(~season) +
theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
theme(axis.text.y = element_text(size = 7)) +
facet_wrap(~season)}
## wilid_full spp lookup
lu_wilid_full_spp <- read_csv("./data/lu_wilid_full_spp.csv")
## bv occ
lu_site_beav <- read_csv("./data/lu_site_site2_beav.csv")
### try to add spp info to the 18 and 19
wilid.spp.lu2 <- read_csv("./data/wilid_spp._lu_v2.csv") %>%
select(c(species, wilid_full)) %>%
distinct()
## change ref system to start with lu
lu.wilid.spp.v2 <- read_csv("./data/wilid_spp._lu_v2.csv") %>%
select(c(species, wilid_full)) %>%
distinct()
plot.lu <- read_csv("./data/plot_lookup_master.csv",col_types = "ccccc")
## read in lu
lu_spp_site2 <- read_csv("./data/lu_spp_site2.csv")
## revised 20180228
# w_ht <- read_excel("data/Select_files/willow_height/WillowHt_2001_2017_Final.xlsx")
# dataframe comparisons
# setdiff(bigFrame, smallFrame) # dplyr solution for comparison
# comp <- setdiff(w_ht,w_ht2)
# Using functions in "comparison" package
# comparison <- compare(w_ht,w_ht2,allowAll=TRUE)
## Continuing with the file downloaded from the GDr 20180321
w_ht <- read_excel("./data/raw/ht/2016_2017_Data_Entry_Format_WillowHt_2001_2015.xlsx")
w_ht %>%
# distinct(season)
mutate(year = as.character(year)) %>%
group_by(site, season, year) %>%
tally() %>%
ggplot(aes(year, site)) +
geom_tile(aes(fill = n)) +
facet_wrap(~season) +
labs(caption = "data/raw/ht/2016_2017_Data_Entry_Format_WillowHt_2001_2015.xlsx") +
theme(axis.text.x = element_text(angle = 55, hjust = 1))
w_ht01_17 <- read_excel("data/raw/ht/WillowHt_2001_2017_QAQCd_Version.xlsx")
w_ht <- bind_rows(w_ht, w_ht01_17) %>%
# select(-c(ID, exp, dam, browse)) %>%
filter(plantht != "NA") %>%
rename(pl_ht_cm = plantht) %>%
mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>%
distinct()
### these files have no species id
Interogating other raw files looking for spring data
w_ht01_17 %>%
group_by(year, season, site) %>%
tally() %>%
ggplot(aes(as.character(year), season)) +
geom_tile(aes(fill = n)) +
labs(caption = "WillowHt_2001_2017_QAQCd_Version.xlsx")
> Still missing spring data…
## MORE SPRING DATA?!
## the experimental
nsf.ht.spr <- read_delim("./data/NSF_DataArchive20180208/Plant_level_spring_current_annual_growth_and_height_on_experimental_plot/willows-plantCAGspring2003-2015.txt", delim = " ")
nsf.ht.spr.obs <- read_delim("./data/NSF_DataArchive20180208/Plant_level_spring_current_annual_growth_and_height_on_observation_plot/OBS-plantCAGspring2009-2013.txt", delim = " ", col_types = "cccccddcccc")
# clean. rename
nsf.ht.spr <- nsf.ht.spr %>%
select(year, site, plot, species, willid, plantht) %>%
rename(yr = year, wilid = willid, pl_ht_cm = plantht) %>%
mutate(season = "spring")
nsf.ht.spr.obs <- nsf.ht.spr.obs %>%
select(year, site, plot, species, willid, plantht) %>%
rename(yr = year, wilid = willid, pl_ht_cm = plantht)
# clean. na
nsf.ht.spr <- nsf.ht.spr %>%
filter(!is.na(pl_ht_cm)) %>%
mutate(season = "spring") %>%
mutate(yr = as.character(yr)) %>%
mutate(wilid = as.character(wilid))
nsf.ht.spr.obs <- nsf.ht.spr.obs %>%
filter(!is.na(pl_ht_cm)) %>%
mutate(season = "spring") %>%
mutate(plot = "obs")
## combine obs and exp for spring
nsf.ht.spr <- bind_rows(nsf.ht.spr,nsf.ht.spr.obs)
# clean
nsf.ht.spr <- nsf.ht.spr %>%
mutate(plot = if_else(is.na(site),"obs",plot)) %>%
filter(!is.na(plot))
nsf.ht.spr %>%
# distinct(site)
visdat::vis_miss() +
labs(caption = "nsf.ht.spr")
nsf.ht.spr %>%
mutate(site2 = paste0(site,"-",plot)) %>%
ggTile_yr_season_site2() +
labs(caption = "nsf.ht.spr")
nsf.ht.spr %>%
mutate(site2 = paste0(site,"-",plot)) %>%
group_by(site2, yr) %>%
tally() %>%
ungroup() %>%
ggplot(aes(yr, site2)) +
geom_tile(aes(fill = n)) +
labs(title = "combined obs and exp files from DCC")
## Fa raw DCC
nsf.ht.fa.exp <- read_delim("./data/NSF_DataArchive20180208/Plant_level_fall_current_annual_growth_and_height_on_experimental_plot/willows-plantCAGfall2002-2015.txt", delim = " ") %>%
select(year, site, plot, species, willid, height) %>%
rename(yr = year, wilid = willid, pl_ht_cm = height) %>%
mutate(season = "fall")
nsf.ht.fa.obs <- read_delim("./data/NSF_DataArchive20180208/Plant_level_fall_current_annual_growth_and_height_on_observation_plot/OBS_shootCAGfall08-15.txt", delim = " ") %>%
select(year, site, plot, species, willid, height) %>%
rename(yr = year, wilid = willid, pl_ht_cm = height) %>%
mutate(season = "fall")
## combine raw dcc exp and obs
nsf.ht.fa.expobs <- bind_rows(nsf.ht.fa.exp, nsf.ht.fa.obs) %>%
distinct() %>%
mutate(yr = as.character(yr), wilid = as.character(wilid))
## dcc combined
dcc.ht.fa.spr <- bind_rows(nsf.ht.fa.expobs, nsf.ht.spr) %>%
mutate(wilid = as.character(wilid)) %>%
distinct()
dcc.ht.fa.spr %>%
mutate(site2 = paste0(site,"-",plot)) %>%
ggTile_yr_season_site2() +
labs(caption = "dcc.ht.fa.spr")
# clean. na
dcc.ht.fa.spr <- dcc.ht.fa.spr %>%
filter(!is.na(pl_ht_cm))
##
dcc.ht.fa.spr <- dcc.ht.fa.spr %>%
mutate(plot = if_else(is.na(plot),"obs",plot)) %>%
mutate(wilid_full = paste0(site,"-",plot, "-", wilid))
dcc.ht.fa.spr %>%
visdat::vis_miss()
dcc.ht.fa.spr %>%
visdat::vis_guess()
# dcc.ht.fa.spr %>%
# # filter(is.na(species)) %>%
# filter(yr != "910" & yr != "189" & yr !='36415' & yr != "355" & yr != "379") %>%
# distinct(yr, season) %>%
# gt()
## lu export
# dcc.wilid.spp.lu <- dcc.ht.fa.spr %>%
# distinct(site, plot, species, wilid, wilid_full)
#
# dcc.wilid.spp.lu %>%
# datatable()
## export
# dcc.wilid.spp.lu %>%
# write_csv("./data/dcc_wilid_spp_lu.csv")
### Pre-process data
#
# * Type conversions (e.g., ID's, treatment codes to character)
# * Identify then filter NA/missing for "plantht"
# * Create fields defining treatment classes and incorporate into siteid ("siteid2", as defined in the water table dataset)
# * Create field combining willowid (not unique across all records) with site2
#
# # RETURN HERE FOR FOR MORE QA QC
#
# # convert willow id, dam, browse, exp to character instead of integer
# w_ht <- w_ht %>%
# mutate(willid = as.character(willid)) %>%
# mutate(exp = as.character(exp)) %>%
# mutate(dam = as.character(dam)) %>%
# mutate(browse = as.character(browse))
#
# # create table of just the missing/NA for inspection
# w_ht %>%
# filter(plantht == "na" | plantht == "missing" | plantht == "NA") %>%
# datatable(caption = "Records with missing/NA/na for plantht.")
#
# ## filter out "NA", "na" and "missing"; convert "plantht" to numeric; create treatment classes in incorporate into new ID fields for summarization
# # w_ht <- w_ht %>%
# # filter(plantht != "na" & plantht != "missing" & plantht != "NA") %>%
# # mutate(plantht = as.numeric(plantht)) %>%
# # mutate(treat = case_when(dam == 1 & browse == 0 ~ "DX",
# # dam == 1 & browse == 1 ~ "DC",
# # dam == 0 & browse == 0 ~ "CX",
# # dam == 0 & browse == 1 ~ "CC",
# # dam == 2 & browse == 1 ~ "OBS")) %>%
# # mutate(site2 = paste(site, treat, sep = '-')) %>%
# # mutate(willid.site2 = paste(willid, site2, sep = '-'))
# # Note that this doesn't correctly parse the exp and obs sites, as TH pointed out.
# # So instead see the following modified code:
###
w_ht <- w_ht %>%
filter(pl_ht_cm != "na" & pl_ht_cm != "missing" & pl_ht_cm != "NA") %>%
mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>%
rename(wilid = willid) %>%
mutate(wilid_full = paste0(wilid,"=", site))
## don't think I need this bc use of lu
w_ht <- w_ht %>%
# # rename(pl_ht_cm = plantht) %>%
mutate(plot = case_when(dam == 1 & browse == 0 ~ "dx",
dam == 1 & browse == 1 ~ "dc",
dam == 0 & browse == 0 ~ "cx",
dam == 0 & browse == 1 & exp == 1 ~ "cc",
dam == 0 & browse == 1 & exp != 1 ~ "obs",
dam == 0 & browse == 1 ~ "cc",
dam == 2 & browse == 1 ~ "obs",
TRUE ~ "oth")) %>%
mutate(site2 = paste0(site, plot, sep = '-'))
# not sure on the interpretation of the "2"
# w_ht %>% distinct(dam) # Three values: 0,1,2. Expected only 0,1.
# added the site2 field to match that used in well data
# Tip: if you are testing a scalar, use if(). Testing a vector against a single condition, dplyr::if_else. Testing a vector against multiple conditions, use case_when.
w_ht <- w_ht %>%
mutate_if(.predicate = is.character,.funs=tolower)
w_ht <- w_ht %>%
mutate(site = case_when(site == "halibuff"~"hailbuf",
TRUE ~ site))
w_ht %>%
visdat::vis_dat()
# w_ht %>%
# distinct(site) %>%
# View()
p16.raw <- read_excel("./data/raw/production/2016_Raw_Production_20180421.xlsx", col_types = "text") %>%
janitor::clean_names()
p16.raw <- p16.raw %>%
rename(wilid = wildid) %>%
rename(species = spp) %>%
rename(pl_ht_cm = height_cm) %>%
rename(live_dead = live) %>%
mutate(season = "spring") %>%
mutate(yr = as.character("2019"))
# some cleaning
p16.raw <- p16.raw %>%
mutate(plot = if_else(is.na(plot),"obs",plot)) %>%
mutate(site2 = paste0(site, "-", "plot")) %>%
filter(wilid != "?") %>%
mutate(wilid = str_remove(.$wilid, ".0"))
p16.ht <- p16.raw %>%
select(c(site, site2, plot, species, wilid, pl_ht_cm, site)) %>%
mutate(wilid_full = paste0(site,"-",plot,"-",wilid))
p16.ht %>%
visdat::vis_miss()
# p16.ht <- p16.ht %>%
# left_join(., lu.wilid.spp.v2, by = "wilid_full") %>%
# mutate(species = case_when(is.na(species.x)~species.y,
# is.na(species.y)~species.x,
# TRUE ~ "NA")) %>%
# select(-c(species.x, species.y))
p16.ht <-
p16.ht %>%
# filter(!is.na(pl_ht_cm)) %>%
filter(!is.na(wilid))
p16.ht %>%
visdat::vis_dat()
# u16.ht %>%
# filter(is.na(species))
p16.ht <- p16.ht %>%
select(-c(species)) %>%
mutate(yr = as.character("2016")) %>%
mutate(season = "fall") %>%
distinct()
p16.ht %>%
ggTile_yr_season_site() +
labs(caption = "p16.ht")
# 2016 utilization
u16.raw <- read_excel("./data/raw/utilization/Raw_2016_Utilization/2016_Raw_Utilization.xlsx", col_types = "text") %>%
janitor::clean_names()
u16.raw <- u16.raw %>%
rename(wilid = wildid) %>%
mutate(wilid = as.numeric(wilid)) %>%
mutate(wilid = as.character(wilid)) %>%
rename(species = spp) %>%
rename(pl_ht_cm = ht_cm) %>%
rename(live_dead = live) %>%
mutate(season = "spring") %>%
mutate(yr = as.character('yr'))
# some cleaning
u16.raw <- u16.raw %>%
mutate(site = if_else(site == "elk5 ?", "elk5",site)) %>%
mutate(plot = if_else(is.na(plot),"obs",plot)) %>%
mutate(site = if_else(site == "xtal","crystal",site)) %>%
mutate(site2 = paste0(site, "-", "plot"))
u16.raw %>%
distinct(site, plot) %>%
datatable()
u16.ht <- u16.raw %>%
select(c(site, site2, plot, species, wilid, pl_ht_cm, site, live_dead)) %>%
mutate(wilid_full = paste0(site,"-",plot,"-",wilid))
u16.ht %>%
visdat::vis_miss()
# './data/2016_2017_Data_Entry_Format_WillowHt_2001_2015.csv'
u16.ht <- u16.ht %>%
left_join(., lu.wilid.spp.v2, by = "wilid_full") %>%
mutate(species = case_when(is.na(species.x)~species.y,
is.na(species.y)~species.x,
TRUE ~ "NA")) %>%
select(-c(species.x, species.y))
u16.ht %>%
visdat::vis_dat()
# u16.ht <- u16.ht %>%
# filter(!is.na(pl_ht_cm))
# u16.ht %>%
# filter(is.na(species))
ht_lm_entry <- read_csv("./data/raw/ht/2016_2017_Data_Entry_Format_WillowHt_2001_2015.csv")
ht_lm_entry <- ht_lm_entry %>%
janitor::clean_names() %>%
select(-id) %>%
rename(wilid = willid) %>%
mutate(plot = case_when(dam == "1" & browse == "1" ~ "dc",
dam == "1" & browse == "0" ~ "dx",
dam == "0" & browse == "1" ~ "cc",
dam == "0" & browse == "0" ~ "cx",
TRUE ~ "obs"))
ht_lm_entry <- ht_lm_entry %>%
rename(yr = year) %>%
rename(pl_ht_cm = plantht) %>%
mutate_if(.predicate = is.numeric,.funs = as.character) %>%
mutate(pl_ht_cm = as.numeric(pl_ht_cm))
ht_lm_entry %>%
mutate(site2 = paste0(site,"-",plot)) %>%
group_by(yr, season, site2) %>%
tally() %>%
filter(yr == "2017") %>%
filter(season != "fall") %>%
ggplot(aes(yr, site2)) +
geom_tile(aes(fill = n)) +
labs(caption = "Only 2017 data")
## only 2017 spring data
ht_lm_entry_17 <- ht_lm_entry %>%
filter(yr == '2017')
p17.raw <- read_excel("./data/raw/production/2017_Raw_Production_20180423.xlsx", col_types = "text") %>%
janitor::clean_names()
p17.raw %>% glimpse()
## Observations: 1,122
## Variables: 79
## $ date <chr> "8152017", "8152017", "8152017", "8152017...
## $ technician <chr> "tg sb ak hh", "tg sb ak hh", "tg sb ak h...
## $ site <chr> "wb", "wb", "wb", "wb", "wb", "wb", "wb",...
## $ plot <chr> "cx", "cx", "cx", "cx", "cx", "cx", "cx",...
## $ spp <chr> "beb", "beb", "beb", "beb", "beb", "beb",...
## $ wildid <chr> "257", "257", "257", "260", "260", "260",...
## $ sapsucker_wells_y_n <chr> "y", "y", "y", "y", "y", "y", "y", "y", "...
## $ cytospora_fungus_y_n <chr> "y", "y", "y", "y", "y", "y", "y", "y", "...
## $ height_cm <chr> "227", "227", "227", "402", "402", "402",...
## $ long_diameter_cm <chr> "115", "115", "115", "360", "360", "360",...
## $ perp_diameter_cm <chr> "65", "65", "65", "290", "290", "290", "1...
## $ number_new_dead_stems <chr> "0", "0", "0", "0", "0", "0", "0", "0", "...
## $ number_of_new_stems <chr> "3", "3", "3", "0", "0", "0", "0", "0", "...
## $ new_stem_length <chr> "43", "43", "43", NA, NA, NA, NA, NA, NA,...
## $ stem_id <chr> "11704", "25813", "5609", "262", "260", "...
## $ live <chr> "live", "live", "live", "live", "live", "...
## $ meas_extent <chr> NA, "no todos", NA, "band", "band", "band...
## $ notes <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ check_meas <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, "chec...
## $ stem_basal_mm <chr> "206", "18.5", "13.9", "37.20000000000000...
## $ biggest_shoot_diameter_mm <chr> "na", "na", "na", "na", "na", "na", "na",...
## $ long_shoot_length_cm <chr> "12", "15", NA, "15", "21", "82", "10", "...
## $ br_scheme <chr> "0", "0", "0", "0", "0", "0", "0", "0", "...
## $ ub_scheme <chr> "25", "20", "1", "70", "30", "100", "40",...
## $ sht_len_1 <chr> "2", "1", "1", "3", "1", "3", "2", "2", "...
## $ sht_len_2 <chr> "1", "3", "1", "3", "2", "4", "2", "2", "...
## $ sht_len_3 <chr> "1", "1", "2", "2", "5", "7", "2", "2", "...
## $ sht_len_4 <chr> "1", "1", "1", "4", "3", "4", "4", "2", "...
## $ sht_len_5 <chr> "1", "1", "1", "1", "1", "2", "3", "2", "...
## $ sht_len_6 <chr> "2", "1", "1", "5", "3", "2", "3", "3", "...
## $ sht_len_7 <chr> "1", "1", "1", "1", "1", "4", "2", "4", "...
## $ sht_len_8 <chr> "2", "1", "1", "2", "2", "4", "1", "2", "...
## $ sht_len_9 <chr> "1", "2", "1", "2", "2", NA, "4", "6", "1...
## $ sht_len_10 <chr> "3", "1", "1", "6", "5", NA, "3", "6", "1...
## $ sht_len_11 <chr> "3", "1", "1", "3", "11", NA, NA, "1", "2...
## $ sht_len_12 <chr> "7", "1", "1", "3", "2", NA, NA, "3", "3"...
## $ sht_len_13 <chr> "6", "1", "2", "1", "11", NA, NA, NA, "2"...
## $ sht_len_14 <chr> NA, "4", "1", "2", "3", NA, NA, NA, "2", ...
## $ sht_len_15 <chr> NA, "14", "1", NA, "1", NA, NA, NA, "3", ...
## $ sht_len_16 <chr> NA, "6", "1", NA, "2", NA, NA, NA, NA, NA...
## $ sht_len_17 <chr> NA, "2", "1", NA, "3", NA, NA, NA, NA, NA...
## $ sht_len_18 <chr> NA, "2", "1", NA, "4", NA, NA, NA, NA, NA...
## $ sht_len_19 <chr> NA, NA, "5", NA, "3", NA, NA, NA, NA, NA,...
## $ sht_len_20 <chr> NA, NA, "1", NA, "2", NA, NA, NA, NA, NA,...
## $ sht_len_21 <chr> NA, NA, "1", NA, "3", NA, NA, NA, NA, NA,...
## $ sht_len_22 <chr> NA, NA, "1", NA, "2", NA, NA, NA, NA, NA,...
## $ sht_len_23 <chr> NA, NA, "2", NA, "2", NA, NA, NA, NA, NA,...
## $ sht_len_24 <chr> NA, NA, "1", NA, "1", NA, NA, NA, NA, NA,...
## $ sht_len_25 <chr> NA, NA, "1", NA, "2", NA, NA, NA, NA, NA,...
## $ sht_len_26 <chr> NA, NA, "1", NA, "1", NA, NA, NA, NA, NA,...
## $ sht_len_27 <chr> NA, NA, "2", NA, "7", NA, NA, NA, NA, NA,...
## $ sht_len_28 <chr> NA, NA, "1", NA, "3", NA, NA, NA, NA, NA,...
## $ sht_len_29 <chr> NA, NA, "1", NA, "3", NA, NA, NA, NA, NA,...
## $ sht_len_30 <chr> NA, NA, "1", NA, "1", NA, NA, NA, NA, NA,...
## $ sht_len_31 <chr> NA, NA, "1", NA, "1", NA, NA, NA, NA, NA,...
## $ sht_len_32 <chr> NA, NA, "1", NA, "2", NA, NA, NA, NA, NA,...
## $ sht_len_33 <chr> NA, NA, "1", NA, "3", NA, NA, NA, NA, NA,...
## $ sht_len_34 <chr> NA, NA, "1", NA, "1", NA, NA, NA, NA, NA,...
## $ sht_len_35 <chr> NA, NA, "1", NA, "1", NA, NA, NA, NA, NA,...
## $ sht_len_36 <chr> NA, NA, "1", NA, NA, NA, NA, NA, NA, NA, ...
## $ sht_len_37 <chr> NA, NA, "1", NA, NA, NA, NA, NA, NA, NA, ...
## $ sht_len_38 <chr> NA, NA, "2", NA, NA, NA, NA, NA, NA, NA, ...
## $ sht_len_39 <chr> NA, NA, "2", NA, NA, NA, NA, NA, NA, NA, ...
## $ sht_len_40 <chr> NA, NA, "2", NA, NA, NA, NA, NA, NA, NA, ...
## $ sht_len_41 <chr> NA, NA, "2", NA, NA, NA, NA, NA, NA, NA, ...
## $ sht_len_42 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_43 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_44 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_45 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_46 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_47 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_48 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_49 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_50 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_51 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_52 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ sht_len_53 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ unbrowse_remainder <chr> "13", "8", NA, "38", "18", "82", "7", "7"...
## $ browse_remainder <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
p17.raw %>% distinct(plot)
## # A tibble: 9 x 1
## plot
## <chr>
## 1 cx
## 2 dx
## 3 cc
## 4 dc
## 5 1
## 6 <NA>
## 7 4
## 8 5
## 9 6
## bad plot data entry
p17.raw <- p17.raw %>%
# distinct(plot)
mutate(plot = case_when(is.na(plot)~"obs",
plot == "1" ~ "obs",
plot == "4" ~ "obs",
plot == "5" ~ "obs",
plot == "6" ~ "obs",
TRUE ~ plot))
p17.raw %>%
tabyl(site, plot) %>%
gt() %>%
tab_header(title = "Count of observations in 2017 raw production data")
| Count of observations in 2017 raw production data | |||||
|---|---|---|---|---|---|
| site | cc | cx | dc | dx | obs |
| crescent | 0 | 0 | 0 | 0 | 16 |
| eb1 | 36 | 28 | 57 | 60 | 0 |
| eb2 | 18 | 37 | 0 | 45 | 0 |
| elk | 49 | 78 | 38 | 70 | 9 |
| elk1 | 0 | 0 | 0 | 0 | 18 |
| elk2 | 0 | 0 | 0 | 0 | 16 |
| elk3 | 0 | 0 | 0 | 0 | 9 |
| elk4 | 0 | 0 | 0 | 0 | 15 |
| elk5 | 0 | 0 | 0 | 0 | 16 |
| ghole | 0 | 0 | 0 | 0 | 18 |
| glen | 0 | 0 | 0 | 0 | 16 |
| halibuff | 0 | 0 | 0 | 0 | 6 |
| lb | 0 | 0 | 0 | 0 | 53 |
| lb1 | 0 | 0 | 0 | 0 | 13 |
| lb2 | 0 | 0 | 0 | 0 | 22 |
| lb3 | 0 | 0 | 0 | 0 | 19 |
| lostc | 0 | 0 | 0 | 0 | 17 |
| lostl | 0 | 0 | 0 | 0 | 15 |
| oxbow | 0 | 0 | 0 | 0 | 15 |
| rose | 0 | 0 | 0 | 0 | 12 |
| slide | 0 | 0 | 0 | 0 | 8 |
| wb | 69 | 71 | 71 | 67 | 14 |
| wn | 0 | 1 | 0 | 0 | 0 |
p17.raw <- p17.raw %>%
rename(wilid = wildid) %>%
rename(species = spp) %>%
rename(pl_ht_cm = height_cm) %>%
rename(live_dead = live) %>%
mutate(season = "fall") %>%
mutate(yr = "2017") %>%
mutate(yr = as.character(yr))
# some cleaning
p17.raw <- p17.raw %>%
# mutate(site = if_else(site == "elk5 ?", "elk5",site)) %>%
mutate(plot = if_else(is.na(plot),"obs",plot)) %>%
mutate(site2 = paste0(site, "-", plot))
p17.ht <- p17.raw %>%
select(c(yr, season, site, site2, plot, species, wilid, pl_ht_cm, live_dead)) %>%
mutate(wilid_full = paste0(site,"-",plot,"-",wilid))
# './data/2016_2017_Data_Entry_Format_WillowHt_2001_2015.csv'
# p17.ht <- p17.ht %>%
# left_join(., lu.wilid.spp.v2, by = "wilid_full") %>%
# mutate(species = case_when(is.na(species.x)~species.y,
# is.na(species.y)~species.x,
# TRUE ~ "NA")) %>%
# select(-c(species.x, species.y))
p17.ht <- p17.ht %>%
filter(!is.na(pl_ht_cm)) %>%
mutate(pl_ht_cm = as.numeric(pl_ht_cm))
p17.ht %>%
visdat::vis_dat()
p17.ht %>%
tabyl(site2,plot) %>%
gt() %>%
tab_header(title = "2017 Fall Count by Site2 and Plot")
| 2017 Fall Count by Site2 and Plot | |||||
|---|---|---|---|---|---|
| site2 | cc | cx | dc | dx | obs |
| crescent-obs | 0 | 0 | 0 | 0 | 16 |
| eb1-cc | 34 | 0 | 0 | 0 | 0 |
| eb1-cx | 0 | 28 | 0 | 0 | 0 |
| eb1-dc | 0 | 0 | 53 | 0 | 0 |
| eb1-dx | 0 | 0 | 0 | 59 | 0 |
| eb2-cc | 18 | 0 | 0 | 0 | 0 |
| eb2-cx | 0 | 37 | 0 | 0 | 0 |
| eb2-dx | 0 | 0 | 0 | 45 | 0 |
| elk-cc | 45 | 0 | 0 | 0 | 0 |
| elk-cx | 0 | 77 | 0 | 0 | 0 |
| elk-dc | 0 | 0 | 37 | 0 | 0 |
| elk-dx | 0 | 0 | 0 | 70 | 0 |
| elk-obs | 0 | 0 | 0 | 0 | 9 |
| elk1-obs | 0 | 0 | 0 | 0 | 4 |
| elk2-obs | 0 | 0 | 0 | 0 | 13 |
| elk3-obs | 0 | 0 | 0 | 0 | 5 |
| elk4-obs | 0 | 0 | 0 | 0 | 15 |
| elk5-obs | 0 | 0 | 0 | 0 | 16 |
| ghole-obs | 0 | 0 | 0 | 0 | 18 |
| glen-obs | 0 | 0 | 0 | 0 | 16 |
| halibuff-obs | 0 | 0 | 0 | 0 | 6 |
| lb-obs | 0 | 0 | 0 | 0 | 53 |
| lb1-obs | 0 | 0 | 0 | 0 | 13 |
| lb2-obs | 0 | 0 | 0 | 0 | 22 |
| lb3-obs | 0 | 0 | 0 | 0 | 16 |
| lostc-obs | 0 | 0 | 0 | 0 | 17 |
| lostl-obs | 0 | 0 | 0 | 0 | 15 |
| oxbow-obs | 0 | 0 | 0 | 0 | 15 |
| rose-obs | 0 | 0 | 0 | 0 | 12 |
| slide-obs | 0 | 0 | 0 | 0 | 8 |
| wb-cc | 69 | 0 | 0 | 0 | 0 |
| wb-cx | 0 | 71 | 0 | 0 | 0 |
| wb-dc | 0 | 0 | 71 | 0 | 0 |
| wb-dx | 0 | 0 | 0 | 67 | 0 |
| wb-obs | 0 | 0 | 0 | 0 | 14 |
| wn-cx | 0 | 1 | 0 | 0 | 0 |
p17.ht <- p17.ht %>%
mutate(site = case_when(site == "wn" ~ "wb",
TRUE ~ site))
p17.ht %>%
distinct(site2, plot, wilid) %>%
datatable()
p17.ht %>% distinct(wilid_full) %>% datatable()
## issue with obs plots having
p17.ht %>%
ggTile_yr_season_site2() +
labs(caption = "p17.ht")
# 2017 utilization
u17.raw <- read_excel("./data/raw/utilization/Raw_2017_Utilization/2017_Raw_Utilization.xlsx", col_types = "text") %>%
janitor::clean_names()
u17.raw <- u17.raw %>%
rename(wilid = wildid) %>%
rename(species = spp) %>%
rename(pl_ht_cm = ht_cm) %>%
rename(live_dead = live) %>%
mutate(season = "spring") %>%
mutate(yr = as.character("2017"))
# some cleaning
u17.raw <- u17.raw %>%
mutate(site = if_else(site == "elk5 ?", "elk5",site)) %>%
mutate(plot = if_else(is.na(plot),"obs",plot)) %>%
mutate(site2 = paste0(site, "-", plot))
#
u17.raw <- u17.raw %>%
filter(!is.na(pl_ht_cm)) %>%
select(c(yr, site, site2, plot, wilid, season, yr, pl_ht_cm, live_dead)) %>%
mutate(wilid_full = paste0(site,"-",plot,"-",wilid)) %>%
mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>%
filter(pl_ht_cm < 800)
# u17.raw %>%
# distinct(wilid) %>%
# View()
## filter out the
u17.ht <- u17.raw %>%
filter(live_dead != "DEAD" &
live_dead != "dead" &
live_dead != "missing")
### summarizing by wilid
u17.ht <- u17.ht %>%
select(-live_dead) %>%
group_by(yr, site, site2, season, plot, wilid, wilid_full) %>%
summarise(pl_ht_cm = max(pl_ht_cm)) %>%
ungroup()
u17.ht %>%
visdat::vis_miss() +
labs(caption = "missing data, raw 2017 data")
u17.ht %>%
visdat::vis_dat() +
labs(caption = "u17.ht")
u17.ht %>%
glimpse()
## Observations: 378
## Variables: 8
## $ yr <chr> "2017", "2017", "2017", "2017", "2017", "2017", "2017", ...
## $ site <chr> "crescent", "crescent", "crescent", "crescent", "crescen...
## $ site2 <chr> "crescent-obs", "crescent-obs", "crescent-obs", "crescen...
## $ season <chr> "spring", "spring", "spring", "spring", "spring", "sprin...
## $ plot <chr> "obs", "obs", "obs", "obs", "obs", "cc", "cc", "cc", "cc...
## $ wilid <chr> "86", "89", "92", "95", "98", "493", "499", "602", "605"...
## $ wilid_full <chr> "crescent-obs-86", "crescent-obs-89", "crescent-obs-92",...
## $ pl_ht_cm <dbl> 56, 56, 51, 40, 49, 84, 97, 85, 123, 111, 104, 137, 103,...
u17.ht <- u17.ht %>%
mutate(plot = case_when(site2 == "elk4-35" ~"obs",
TRUE ~ plot)) %>%
mutate(plot = case_when(site2 == "elk3-35" ~"obs",
TRUE ~ plot)) %>%
mutate(site2 = paste0(site,"-",plot)) %>%
mutate(wilid_full = paste0(site2,"-",wilid))
u17.ht %>%
ggTile_yr_season_site2() +
theme_grey()
gg.u17 <- u17.ht %>%
ggplot(aes(x = site2, y = pl_ht_cm)) +
geom_point(aes(color = plot)) +
coord_flip()
plotly::ggplotly(gg.u17)
## issue with obs plots having
u17.ht %>%
ggTile_yr_season_site2() +
labs(caption = "u17.ht")
# updated as of 20191203
files2018 <- fs::dir_ls("./data/raw/production/Raw_2018_Production", recurse = FALSE, glob = "*.xlsx")
## csv cache
# path <- readxl_example("datasets.xlsx")
# path %>%
# excel_sheets() %>%
# set_names() %>%
# map(read_then_csv, path = path)
## not working as intended
# read_then_csv2 <- function(fpath){
# fpath %>%
# excel_sheets() %>%
# set_names() %>%
# map(read_then_csv, path = fpath)
# }
#
# files2018 %>%
# map(.x = .,read_then_csv2)
# library(readxl)
## orignal way. works but only reads in one sheet!!
# #for each excel file name, read excel sheet and append to df
# p18 <- files2018 %>%
# map_df( ~ read_excel(path = .x,trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:DV46"))
# # note use of specific range and skipping of column
#### running the above function on 2018 data
# p18 <- files2018 %>%
# map_df(~ read_sheets(.))
p18 <- files2018 %>%
map_df(~ read_sheets_prod(.))
p18 <- p18 %>%
janitor::clean_names() %>%
mutate(yr = 2018) %>%
mutate(season = "fall")
# p19 %>%
# names() %>%
# tibble::enframe(name = NULL) %>%
# datatable()
## Select subset and type cast
p18sel <- p18 %>%
select(file_name, sheet_name, yr, season, site, plot, spp, wilid, pl_ht_cm, stid, live_status)
# obs for na
p18sel <- p18sel %>%
mutate(plot = if_else(condition = is.na(plot), true = "obs", plot))
p18sel %>%
glimpse()
## Observations: 4,422
## Variables: 11
## $ file_name <chr> "./data/raw/production/Raw_2018_Production/crescent_obs...
## $ sheet_name <chr> "Sheet1", "Sheet1", "Sheet1", "Sheet1", "Sheet1", "Shee...
## $ yr <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2...
## $ season <chr> "fall", "fall", "fall", "fall", "fall", "fall", "fall",...
## $ site <chr> "cresc", "cresc", "cresc", "cresc", "cresc", "cresc", "...
## $ plot <chr> "obs", "obs", "obs", "obs", "obs", "obs", "obs", "obs",...
## $ spp <chr> "geyer", "geyer", "geyer", "geyer", "geyer", "geyer", "...
## $ wilid <chr> "86", "86", "86", "86", "86", "86", "89", "89", "89", "...
## $ pl_ht_cm <chr> "108", "108", "108", "108", "108", "108", "68", "68", "...
## $ stid <chr> "86", "86", "87", "87", "88", "88", "89", "89", "90", "...
## $ live_status <chr> "dead", "dead", "live", "live", "live", "live", "missin...
p18sel %>%
visdat::vis_dat()
# p18sel %>%
# distinct() %>%
# select(-date) %>%
# filter(is.na(pl_ht_cm)) %>%
# datatable(caption = "Missing plant ht - 2018 production")
# eliminate duplicate rows
p18sel <- p18sel %>%
distinct() %>%
# select(-date) %>%
filter(!is.na(pl_ht_cm))
visdat::vis_dat(p18sel)
# updated as of 20191203
files2019 <- fs::dir_ls("./data/raw/production/Raw_2019_Production", recurse = FALSE, glob = "*.xlsx")
# ## original way
# #for each excel file name, read excel sheet and append to df
# p19 <- files2019 %>%
# map_df( ~ read_excel(path = .x,trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:DV46"))
#
#### running the above function on 2018 data
# p19 <- files2019 %>%
# map_df(~ read_sheets(.))
p19 <- files2019 %>%
map_df(~ read_sheets_prod(.))
p19 <- p19 %>%
janitor::clean_names() %>%
mutate(yr = 2019) %>%
mutate(season = "fall")
p19 %>%
names() %>%
tibble::enframe(name = NULL) %>%
datatable()
## Select subset and type cast
p19sel <- p19 %>%
select(file_name, sheet_name, yr, season, site, plot, spp, wilid, pl_ht_cm, stid, live_status)
p19sel <- p19sel %>%
mutate(plot = if_else(condition = (is.na(plot)), true = "obs", plot))
# eliminate duplicate rows
p19sel <- p19sel %>%
distinct()
## combine 18 and 19
p18p19sel <- bind_rows(p18sel, p19sel)
visdat::vis_miss(p18p19sel)
# make all lower
p18p19sel <- p18p19sel %>%
mutate_if(.predicate = is.character,.funs=tolower)
p18p19sel <- p18p19sel %>%
filter(!is.na(pl_ht_cm))
# remove records that have na for wilid
p18p19sel <- p18p19sel %>%
filter(!is.na(wilid)) %>%
filter(!is.na(site)) %>%
filter(!is.na(pl_ht_cm))
## summarize the willow height by wilid and year
p1819.wilidht <- p18p19sel %>%
group_by(yr, site, spp, wilid, live_status) %>%
summarise(pl_ht_cm = max(pl_ht_cm)) %>%
ungroup()
# eliminate the na for plant height
# p1819.wilidht %>%
# filter(is.na(pl_ht_cm)) %>%
# gt() %>%
# tab_header(title = "Records with NA for plant ht ")
# eliminate the na for plant height
p1819.wilidht <- p1819.wilidht %>%
filter(!is.na(pl_ht_cm))
ufiles2018 <- fs::dir_ls("./data/raw/utilization/Raw_2018_Utilization", recurse = FALSE, glob = "*.xlsx")
## single sheet
# u18 <- ufiles2018 %>%
# map_df( ~ read_excel(path = .x,trim_ws = TRUE, skip = 1, col_types = "text"))
## add code to read multitabs...
# u18 <- ufiles2018 %>%
# map_df(~ read_sheets(.))
u18 <- ufiles2018 %>%
map_df(~ read_sheets_util(.))
# u18 %>% distinct(file_name)
# map_df( ~ read_excel(path = .x,trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:DV46"))
# # note use of specific range and skipping of column
u18 <- u18 %>%
janitor::clean_names() %>%
mutate(yr = 2018) %>%
mutate(season = "spring")
## whack a mole.
# change lb41 to lb4
u18 <- u18 %>%
mutate(site = case_when(site == "lb41" ~ "lb4",
TRUE ~ site))
# u18 %>%
# glimpse()
u18 %>%
names() %>%
tibble::enframe(name = NULL) %>%
gt() %>%
tab_header(title = "variable names")
| variable names |
|---|
| value |
| file_name |
| sheet_name |
| date |
| tech_initial |
| site |
| plot |
| spp |
| wilid |
| stid |
| yr_added |
| pl_ht_cm |
| live_status |
| meas_extent |
| tag_type |
| notes_util_2018 |
| notes_prod_2018 |
| rem |
| sl |
| scheme |
| len_dia |
| x1a |
| x1b |
| x2a |
| x2b |
| x3a |
| x3b |
| x4a |
| x4b |
| x5a |
| x5b |
| x6a |
| x6b |
| x7a |
| x7b |
| x8a |
| x8b |
| x9a |
| x9b |
| x10a |
| x10b |
| x11a |
| x11b |
| x12a |
| x12b |
| x13a |
| x13b |
| x14a |
| x14b |
| x15a |
| x15b |
| x16a |
| x16b |
| x17a |
| x17b |
| x18a |
| x18b |
| x19a |
| x19b |
| x20a |
| x20b |
| x21a |
| x21b |
| x22a |
| x22b |
| x23a |
| x23b |
| x24a |
| x24b |
| x25a |
| x25b |
| x26a |
| x26b |
| x27a |
| x27b |
| x28a |
| x28b |
| x29a |
| x29b |
| x30a |
| x30b |
| x31a |
| x31b |
| x32a |
| x32b |
| x33a |
| x33b |
| x34a |
| x34b |
| x35a |
| x35b |
| x36a |
| x36b |
| x37a |
| x37b |
| x38a |
| x38b |
| x39a |
| x39b |
| x97 |
| x98 |
| x99 |
| x100 |
| x101 |
| x102 |
| x103 |
| x104 |
| x105 |
| x106 |
| x107 |
| x108 |
| x109 |
| x110 |
| x111 |
| x112 |
| x113 |
| x114 |
| x115 |
| x116 |
| x117 |
| x118 |
| x119 |
| x120 |
| x121 |
| tech_initials |
| willid |
| height_cm |
| notes_prod_2017 |
| notes_util_2019 |
| notes_prod_2019 |
| x2017_production_notes |
| x2018_utilization_notes |
| x78 |
| x79 |
| x80 |
| x81 |
| x82 |
| x83 |
| x84 |
| x85 |
| x86 |
| x87 |
| x88 |
| x89 |
| x90 |
| x91 |
| x92 |
| x93 |
| x94 |
| x95 |
| x96 |
| date_year_month_day |
| x40a |
| x40b |
| x41a |
| x41b |
| x42a |
| x42b |
| x43a |
| x43b |
| x44a |
| x44b |
| x45a |
| x45b |
| x46a |
| x46b |
| x47a |
| x47b |
| x48a |
| x48b |
| x49a |
| x49b |
| ht_cm |
| yr |
| season |
u18 %>%
distinct(site) %>% gt()
| site |
|---|
| cresc |
| NA |
| eb1 |
| eb2 |
| elk |
| ghole |
| glen |
| hailbuff |
| lava |
| lb1 |
| lb2 |
| lb3 |
| lb4 |
| lb5 |
| lb6 |
| lost c |
| lost l |
| oxbow |
| rose |
| slide |
| wb |
| wb4 |
| wb2 |
| wb1 |
| yancy |
## WB?
## BIG fix: pl_ht and pl_ht_cm
## willid and wilid
u18 %>%
select(file_name,contains("cm")) %>%
distinct()
## # A tibble: 231 x 4
## file_name pl_ht_cm height_cm ht_cm
## <chr> <chr> <chr> <chr>
## 1 ./data/raw/utilization/Raw_2018_Utilization/crescen~ 57 <NA> <NA>
## 2 ./data/raw/utilization/Raw_2018_Utilization/crescen~ NA <NA> <NA>
## 3 ./data/raw/utilization/Raw_2018_Utilization/crescen~ 50 <NA> <NA>
## 4 ./data/raw/utilization/Raw_2018_Utilization/crescen~ 52 <NA> <NA>
## 5 ./data/raw/utilization/Raw_2018_Utilization/crescen~ <NA> <NA> <NA>
## 6 ./data/raw/utilization/Raw_2018_Utilization/eb1_exp~ <NA> 107 <NA>
## 7 ./data/raw/utilization/Raw_2018_Utilization/eb1_exp~ <NA> 108 <NA>
## 8 ./data/raw/utilization/Raw_2018_Utilization/eb1_exp~ <NA> 136 <NA>
## 9 ./data/raw/utilization/Raw_2018_Utilization/eb1_exp~ <NA> <NA> <NA>
## 10 ./data/raw/utilization/Raw_2018_Utilization/eb1_exp~ <NA> 129 <NA>
## # ... with 221 more rows
u18 <- u18 %>%
mutate(pl_ht_cm = case_when(is.na(height_cm)~ ht_cm,
is.na(pl_ht_cm)~height_cm,
TRUE ~ pl_ht_cm)) %>%
mutate(wilid = case_when(is.na(wilid) ~ willid,
is.na(willid)~ wilid,
TRUE ~ wilid))%>%
filter(!is.na(pl_ht_cm)) %>%
select(-c(height_cm,willid))
u18 %>%
select(file_name,contains("cm")) %>%
distinct() %>%
datatable()
### select subset of columns: for height
u18.ht <- u18 %>%
# glimpse()
select(c('file_name','yr','season', 'spp', 'plot','site','wilid','stid', 'live_status', 'pl_ht_cm'))
# u18.ht %>%
# filter(site == "wb")
u18.ht %>%
filter(!is.na(pl_ht_cm)) %>%
# filter(!is.na(site)) %>%
distinct(site) %>%
gt::gt() %>%
tab_header(title = "Distinct site on u18 import")
| Distinct site on u18 import |
|---|
| site |
| eb1 |
| eb2 |
| elk |
| ghole |
| glen |
| hailbuff |
| lava |
| lb1 |
| lb2 |
| lb3 |
| lb4 |
| lb5 |
| lb6 |
| lost c |
| lost l |
| oxbow |
| rose |
| slide |
| wb |
| wb4 |
| wb2 |
| wb1 |
# u18.ht %>%
# distinct(pl_ht_cm) %>%
# View() # some rows have na entred as text
# rename, reclass
u18.ht <- u18.ht %>%
mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>%
mutate(plot = if_else(condition = is.na(plot), true = "obs", plot))
# clean
u18.ht <- u18.ht %>%
distinct()
# more cleaning
u18.ht <- u18.ht %>%
filter(!is.na(pl_ht_cm)) %>%
filter(wilid != "?") %>%
mutate(wilid = str_remove(.$wilid, ".0")) # remove the ".0" part of strings
##########
## calculate the per wilid height from population of stid
##########
u18.ht <- u18.ht %>%
filter(!is.na(pl_ht_cm)) %>%
group_by(yr, wilid, site, spp, plot, season, live_status) %>%
summarise(pl_ht_cm = max(pl_ht_cm)) %>%
ungroup()
u18.ht <- u18.ht %>%
filter(!is.na(wilid)) %>%
filter(!wilid == "") %>%
mutate(wilid = str_remove(.$wilid, ".0"))
u18.ht %>%
distinct(site)
## # A tibble: 21 x 1
## site
## <chr>
## 1 lb3
## 2 lost l
## 3 slide
## 4 wb
## 5 wb2
## 6 hailbuff
## 7 lost c
## 8 lb1
## 9 eb1
## 10 lava
## # ... with 11 more rows
## where is wb???
u18.ht %>%
visdat::vis_miss()
u18.ht %>%
# visdat::vis_guess() +
# visdat::vis_miss() +
visdat::vis_dat() +
theme_classic() +
labs(title = "2018 Spring Height", subtitle = "Missing map for qa/qc") +
scale_fill_viridis(discrete = TRUE, option = "D") +
coord_flip()
u18.plot1 <- u18.ht %>%
mutate(yr = as.character(yr)) %>%
ggplot(aes(site, pl_ht_cm)) +
# geom_boxplot(aes(color = site)) +
geom_jitter(alpha = .5)
plotly::ggplotly(u18.plot1)
### read_sheets
# read_sheets <- function(file){
# xlsx_file <- file
# xlsx_file %>%
# excel_sheets() %>%
# set_names() %>%
# map_df(read_excel, path = xlsx_file, .id = 'sheet_name', trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:DV46") %>%
# mutate(file_name = file) %>%
# select(file_name, sheet_name, everything())
# }
####
ufiles2019 <- fs::dir_ls("./data/raw/utilization/Raw_2019_Utilization", recurse = FALSE, glob = "*.xlsx")
# u19 <- ufiles2019 %>%
# map_df( ~ read_excel(path = .x,trim_ws = TRUE, skip = 1, col_types = "text"))
## multitab
# u19 <- ufiles2019 %>%
# map_df(~ read_sheets(.))
u19 <- ufiles2019 %>%
map_df(~ read_sheets_util(.))
# map_df( ~ read_excel(path = .x,trim_ws = TRUE, skip = 1, col_types = "text", range = "A2:DV46"))
# # note use of specific range and skipping of column
u19 <- u19 %>%
janitor::clean_names() %>%
mutate(yr = 2019) %>%
mutate(season = "spring")
u19 %>%
names() %>%
tibble::enframe(name = NULL) %>%
datatable()
u19 <- u19 %>%
# mutate(pl_ht_cm = case_when(is.na(height_cm)~ ht_cm,
# is.na(pl_ht_cm)~height_cm,
# TRUE ~ pl_ht_cm)) %>%
mutate(wilid = case_when(is.na(wilid) ~ willid,
is.na(willid)~ wilid,
TRUE ~ wilid))%>%
filter(!is.na(pl_ht_cm))
### select subset of columns: for UTILIZATION
# u19.sel <- u19 %>%
# # glimpse()
# select(c('date','yr','season', 'site','wilid','stid', 'live_status', 'scheme','height_cm','spp','len_dia'), contains("sht"),contains("x"))
# u19 %>% visdat::vis_miss()
### select subset of columns: for height
u19.ht <- u19 %>%
# glimpse()
select(c(file_name, sheet_name, yr, site, season, spp, plot, wilid, stid, live_status, pl_ht_cm))
# 'yr','season', 'spp', 'plot','site','wilid','stid', 'live_status', 'pl_ht_cm'))
# rename, reclass
u19.ht <- u19.ht %>%
# mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>%
mutate(plot = if_else(condition = is.na(plot), true = "obs", plot))
# clean
u19.ht <- u19.ht %>%
# filter(!(is.na(willid) & is.na(wilid))) %>%
distinct()
# more cleaning
u19.ht <- u19.ht %>%
filter(!is.na(pl_ht_cm)) %>%
filter(wilid != "NA")
visdat::vis_miss(u19.ht)
##########
## calculate the per wilid height from population of stid
##########
u19.ht <- u19.ht %>%
filter(!is.na(pl_ht_cm)) %>%
group_by(file_name, yr, wilid, site, spp, plot, season, live_status) %>%
summarise(pl_ht_cm = max(pl_ht_cm)) %>%
ungroup()
u19.ht <- u19.ht %>%
filter(!is.na(wilid))
u19.ht <- u19.ht %>%
mutate_if(.predicate = is.character,.funs=tolower)
visdat::vis_miss(u19.ht)
u19.ht %>%
visdat::vis_miss(cluster = TRUE) +
# visdat::vis_guess() +
theme_classic() +
labs(title = "2019 Spring Height", subtitle = "Missing map for qa/qc") +
scale_fill_viridis(discrete = TRUE) +
coord_flip()
# plotly::ggplotly()
#
u19.ht <- u19.ht %>%
mutate(pl_ht_cm = as.numeric(pl_ht_cm))
u18u19.ht <- bind_rows(u18.ht,u19.ht)
u18u19.ht %>%
mutate(site2 = paste0(site,"-",plot)) %>%
ggTile_yr_season_site2(.) +
labs(caption = "u18u19.ht")
u18u19.ht %>%
# filter(!is.na(site)) %>%
# select(wilid) %>%
# datapasta::vector_paste() %>%
filter((wilid %in% c("415", "418", "449", "449", "452")) & is.na(site)) %>%
# filter(wilid %in% c("415", "418", "449", "449", "452")) %>%
distinct(wilid, site, yr) %>%
arrange(wilid)
## # A tibble: 4 x 3
## wilid site yr
## <chr> <chr> <dbl>
## 1 415 <NA> 2019
## 2 418 <NA> 2019
## 3 449 <NA> 2019
## 4 452 <NA> 2019
# u18u19.ht <- u18u19.ht %>%
# filter(!is.na(site))
# missing data check: spring 2018 and 2019
# u18u19.ht %>%
# mutate(yr = as.character(yr)) %>%
# group_by(yr, season, site) %>%
# summarise(n= n()) %>%
# ggplot(aes(yr, site)) +
# geom_tile(aes(fill = n), color = 'black') +
# facet_wrap(~season)
#### Creating an export file to update out of R
# read in the most current version below this section
# create lu for exp trt
# wilid.lu <- w_ht %>%
# distinct(site, wilid, treat, exp, dam, browse)
# plot.lu <- w_ht %>%
# distinct(treat, exp, dam, browse) %>%
# mutate(plot = tolower(treat))
# plot.lu %>%
# write_csv("./data/plot_lookup_master.csv")
# wilid.lu.1 <- w_ht %>%
# distinct(site, wilid, treat, exp, dam, browse)
# ## join with info
# wilid.lu.2update <- left_join(p1819.wilidht, wilid.lu) %>%
# distinct(site, wilid, treat, exp, dam, browse) %>%
# filter(is.na(treat)) %>%
# bind_rows(.,wilid.lu)
#
## wilid.lu.2update %>%
## write_csv("./data/wilid_lookup_master.csv") # have manually updated this output, read in below
#############
####### read in the lu
# wilid.lu <- read_csv("./data/wilid_lookup_master.csv",col_types = "ccccccc")
# alt updated lu
wilid.lu <- read_csv("./data/wilid_lookup_master_v2.csv",col_types = "cccc")
wilid.lu <- wilid.lu %>%
distinct()
# change willid to wilid
# make everything lowercase
# w_ht <- w_ht %>%
# rename(wilid = willid) %>%
# mutate_if(.predicate = is.character,.funs=tolower)
## join the fall data with site info lu
p1819.wilidht <- left_join(p1819.wilidht, wilid.lu)
# fix na for treat
p1819.wilidht <- p1819.wilidht %>%
# distinct(site, treat) %>% View() # Ichecked and this does correctly convert site = na to obs
mutate(treat = if_else(condition = is.na(treat),true = "obs", treat)) %>%
mutate(site2 = paste0(site,"-",treat)) %>%
rename(plot = treat)
# plot lu
# p1819.wilidht <- left_join(p1819.wilidht, plot.lu, by = "plot")
p1819.wilidht <- p1819.wilidht %>%
mutate(season = "fall")
# u18u19.ht <- left_join(u18u19.ht, wilid.lu, by = "wilid")
## adress missing sites
# u18u19.ht <- u18u19.ht %>%
# mutate(site = case_when(is.na(site.x) ~ site.y,
# # is.na(site.y) ~ site.x,
# TRUE ~ site.x)) %>%
# select(-c(site.x, site.y)) %>%
# filter(!is.na(site))
u18u19.ht <- u18u19.ht %>%
mutate(plot = if_else(condition = plot == "1",true = "obs","obs")) ## not sure why, but error with plot...
# plot lu
# u18u19.ht <- u18u19.ht %>%
# # select(-treat) %>%
# left_join(., plot.lu, by = "plot") %>%
# filter(!is.na(plot))
# u18u19.ht %>% visdat::vis_miss()
# add site info to 18 19 spring measurements
# u18u19.ht <- left_join(u18u19.ht, plot.lu, by = "plot")
# View(u18u19.ht)
# add plot info
# u18u19.ht <- u18u19.ht %>%
# left_join(., plot.lu, by = "plot")
### Combine spring and fall for 2018 and 2019
##
p1819.wilidht <- p1819.wilidht %>%
mutate(pl_ht_cm = as.numeric(pl_ht_cm))
u18u19.ht <- u18u19.ht %>%
filter(wilid != "")
p1819.wilidht <- p1819.wilidht %>%
filter(live_status != "dead") %>%
filter(live_status != "missing")
ht.all18.19 <- bind_rows(p1819.wilidht,u18u19.ht)
## eliminate NA fpr plant ht
ht.all18.19 <- ht.all18.19 %>%
filter(!is.na(pl_ht_cm)) %>%
rename(species = spp)
ht.all18.19 <- ht.all18.19 %>%
mutate(site = case_when(wilid == "415" & is.na(site) ~ "ghole",
wilid == "418" & is.na(site) ~ "ghole",
wilid == "449" & is.na(site) ~ "glen",
wilid == "452" & is.na(site) ~ "glen",
TRUE ~ site)) %>%
mutate(site2 = paste0(site,"-", plot)) %>%
mutate(wilid_full = paste0(site2,"-", wilid))
## ### 2018 2019
# ### try to add spp info to the 18 and 19
# wilid.spp.lu2 <- read_csv("./data/wilid_spp._lu_v2.csv") %>%
# select(c(species, wilid_full))
## address sites without proper site attribution
ht.all18.19 <- ht.all18.19 %>%
mutate(site = case_when(wilid == "415" & is.na(site) ~ "ghole",
wilid == "418" & is.na(site) ~ "ghole",
wilid == "449" & is.na(site) ~ "glen",
wilid == "452" & is.na(site) ~ "glen",
TRUE ~ site)) %>%
select(-file_name) %>%
mutate(site = case_when(site == "lb41" ~ "lb4",
site == 'lost c' ~ "lostc",
site == "lost l" ~ "lostl",
TRUE ~ site)) %>%
mutate(site2 = paste0(site,"-", plot)) %>%
mutate(wilid_full = paste0(site2,"-", wilid))
ht.all18.19 %>%
visdat::vis_miss()
# ht.all18.19 <- ht.all18.19 %>%
# rename(plot = treat)
ht.all18.19 %>%
group_by(yr, season, site2) %>%
tally() %>%
ggplot(aes(as.character(yr), site2)) +
geom_tile(aes(fill = n)) +
facet_wrap(~season)
Missing data: Yancy fall 2018
ht.all18.19 %>%
# filter(is.na(site))
mutate(yr = as.character(yr)) %>%
group_by(yr, season, site2) %>%
summarise(n= n()) %>%
ggplot(aes(yr, site2)) +
geom_tile(aes(fill = n), color = 'black') +
facet_wrap(~season) +
theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
theme(axis.text.y = element_text(size = 7)) +
facet_wrap(~season)
ht.all18.19 %>%
mutate(yr = as.character(yr)) %>%
ggTile_yr_season_site2() +
labs(caption = "ht.all18.19")
## 16
p16.ht <- p16.ht %>%
mutate(site2 = paste0(site,"-",plot)) %>%
mutate(yr = as.character("2016")) %>%
mutate(season = "fall")
p16.ht %>%
tabyl(site2) %>%
gt() %>%
tab_header(title = "n distinct fall 2016 site2")
| n distinct fall 2016 site2 | ||
|---|---|---|
| site2 | n | percent |
| crescent-obs | 5 | 0.012755102 |
| eb1-cc | 13 | 0.033163265 |
| eb1-cx | 11 | 0.028061224 |
| eb1-dc | 15 | 0.038265306 |
| eb1-dx | 16 | 0.040816327 |
| eb1-obs | 5 | 0.012755102 |
| eb2-cc | 10 | 0.025510204 |
| eb2-cx | 14 | 0.035714286 |
| eb2-dc | 20 | 0.051020408 |
| eb2-dx | 14 | 0.035714286 |
| eb2-obs | 5 | 0.012755102 |
| elk-cc | 17 | 0.043367347 |
| elk-cx | 18 | 0.045918367 |
| elk-dc | 15 | 0.038265306 |
| elk-dx | 22 | 0.056122449 |
| elk1-obs | 6 | 0.015306122 |
| elk2-obs | 8 | 0.020408163 |
| elk3-obs | 5 | 0.012755102 |
| elk4-obs | 5 | 0.012755102 |
| elk5-obs | 5 | 0.012755102 |
| ghole-obs | 10 | 0.025510204 |
| halibuff-obs | 5 | 0.012755102 |
| lava-obs | 5 | 0.012755102 |
| lb1-obs | 5 | 0.012755102 |
| lb2-obs | 8 | 0.020408163 |
| lb3-obs | 5 | 0.012755102 |
| lostc-obs | 5 | 0.012755102 |
| lostl-obs | 5 | 0.012755102 |
| oxbow-obs | 5 | 0.012755102 |
| rose-obs | 5 | 0.012755102 |
| slide-obs | 6 | 0.015306122 |
| wb-cc | 21 | 0.053571429 |
| wb-cx | 20 | 0.051020408 |
| wb-dc | 21 | 0.053571429 |
| wb-dx | 21 | 0.053571429 |
| wb1-obs | 5 | 0.012755102 |
| wb2-obs | 4 | 0.010204082 |
| xtal-obs | 2 | 0.005102041 |
| yancy-obs | 5 | 0.012755102 |
## corect site id data entry error
p17.ht <- p17.ht %>%
mutate(site = if_else(site == "wn","wb",site)) %>% mutate(site2 = paste0(site,"-",plot)) %>%
mutate(yr = as.character("2017")) %>%
mutate(season = "fall")
####
p16.ht <- p16.ht %>%
mutate(pl_ht_cm = as.numeric(pl_ht_cm))
p17.ht <- p17.ht %>%
select(-species)
p17.ht %>%
tabyl(site2, season) %>%
arrange(site2) %>%
gt() %>%
tab_header(title = "n distinct fall 2017 site2")
| n distinct fall 2017 site2 | |
|---|---|
| site2 | fall |
| crescent-obs | 16 |
| eb1-cc | 34 |
| eb1-cx | 28 |
| eb1-dc | 53 |
| eb1-dx | 59 |
| eb2-cc | 18 |
| eb2-cx | 37 |
| eb2-dx | 45 |
| elk-cc | 45 |
| elk-cx | 77 |
| elk-dc | 37 |
| elk-dx | 70 |
| elk-obs | 9 |
| elk1-obs | 4 |
| elk2-obs | 13 |
| elk3-obs | 5 |
| elk4-obs | 15 |
| elk5-obs | 16 |
| ghole-obs | 18 |
| glen-obs | 16 |
| halibuff-obs | 6 |
| lb-obs | 53 |
| lb1-obs | 13 |
| lb2-obs | 22 |
| lb3-obs | 16 |
| lostc-obs | 17 |
| lostl-obs | 15 |
| oxbow-obs | 15 |
| rose-obs | 12 |
| slide-obs | 8 |
| wb-cc | 69 |
| wb-cx | 72 |
| wb-dc | 71 |
| wb-dx | 67 |
| wb-obs | 14 |
# p17.ht %>%
# filter(wilid == "236") %>% filter(site2 == "wn-cx") ## corected
p16p17 <- bind_rows(p16.ht,p17.ht)
p16p17 %>%
visdat::vis_dat() +
labs(caption = "p16p17")
# !!!
p16p17 <- p16p17 %>%
distinct()
p16p17 %>%
group_by(yr, season, site) %>%
summarise(n= n()) %>%
ggplot(aes(yr, site)) +
geom_tile(aes(fill = n), color = 'black') +
theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
theme(axis.text.y = element_text(size = 9)) +
facet_wrap(~season) +
theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 9)) +
theme(axis.text.y = element_text(size = 7)) +
facet_wrap(~season) +
scale_fill_viridis(option = "C") +
labs(caption = "Combined 2016 and 2017 fall height")
## spring 16 17
u16.ht <- u16.ht %>%
mutate(season = "spring") %>%
mutate(yr = as.character("2016")) %>%
mutate(pl_ht_cm = as.numeric(pl_ht_cm))
u17.ht <- u17.ht %>%
mutate(season = "spring") %>%
mutate(yr = as.character("2017"))
## Lewis's other entry data
ht_lm_entry_17 <- ht_lm_entry_17 %>%
select(-c(exp,dam,browse)) %>%
mutate(site2 = paste0(site,"-",plot)) %>%
mutate(wilid_full = paste0(site2,"-",wilid))
###
u17.ht %>%
distinct(site2) %>%
gt() %>%
tab_header(title = "distinct site")
| distinct site |
|---|
| site2 |
| crescent-obs |
| eb1-cc |
| eb1-cx |
| eb1-dc |
| eb1-dx |
| eb1-obs |
| eb2-cc |
| eb2-cx |
| eb2-dc |
| eb2-dx |
| eb2-obs |
| elk-cc |
| elk-cx |
| elk-dc |
| elk-dx |
| elk1-obs |
| elk2-obs |
| elk3-obs |
| elk4-obs |
| elk5-obs |
| ghole-obs |
| glen-obs |
| halibuff-obs |
| lava-obs |
| lb1-obs |
| lb2-obs |
| lb3-obs |
| lb4-obs |
| lostc-obs |
| lostl-obs |
| oxbow-obs |
| rose-obs |
| slide-obs |
| wb-cc |
| wb-cx |
| wb-dc |
| wb-dx |
| wb1-obs |
| wb2-obs |
| wb4-obs |
| yancy-obs |
###
u17_lmentry <- u17.ht %>%
mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>%
bind_rows(., ht_lm_entry_17) %>%
# select(-species) %>%
# select(-live_dead) %>%
distinct()
# u17_lmentry %>% distinct(wilid) %>% View()
# ht.all18.19 %>% distinct(wilid) %>% View()
ht.all18.19 <- ht.all18.19 %>%
mutate(wilid = case_when(wilid == "" & site == "lb5" ~ '1070'))
ht.all18.19 %>%
filter(is.na(wilid))
## # A tibble: 1,290 x 10
## yr site species wilid live_status pl_ht_cm plot site2 season wilid_full
## <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 2018 cresc geyer <NA> live 108 obs cresc~ fall cresc-obs~
## 2 2018 cresc pseudo <NA> live 62 obs cresc~ fall cresc-obs~
## 3 2018 eb1 beb <NA> live 204 dc eb1-dc fall eb1-dc-24
## 4 2018 eb1 bebb <NA> live 295 dx eb1-dx fall eb1-dx-5
## 5 2018 eb1 bebb <NA> live 368 dx eb1-dx fall eb1-dx-6
## 6 2018 eb1 boothi <NA> live 164 dx eb1-dx fall eb1-dx-1
## 7 2018 eb1 boothi <NA> live 252 dc eb1-dc fall eb1-dc-11
## 8 2018 eb1 boothi <NA> live 252 dx eb1-dx fall eb1-dx-11
## 9 2018 eb1 boothi <NA> live 206 dc eb1-dc fall eb1-dc-12
## 10 2018 eb1 boothi <NA> live 206 dx eb1-dx fall eb1-dx-12
## # ... with 1,280 more rows
#### bind rows p16, p17, all ht 18 19
p.16.17.18.19 <- ht.all18.19 %>%
rename(live_dead = live_status) %>%
mutate(yr = as.character(yr)) %>%
select(-species) %>%
bind_rows(., p16p17) %>%
distinct()
p.16.17.18.19 %>%
visdat::vis_dat()
## add in the other lewis ht entry file
p.16.17.18.19 <- p.16.17.18.19 %>%
bind_rows(., u17_lmentry) %>%
select(-live_dead) %>%
distinct()
p.16.17.18.19 %>%
visdat::vis_dat()
# p.16.17.18.19 %>% distinct(wilid) %>% View()
u18u19.ht <- u18u19.ht %>%
mutate(yr = as.character(yr))
ht.all.16.17.18.19 <- p.16.17.18.19 %>%
bind_rows(., u18u19.ht) %>%
select(-c(file_name,live_status)) %>%
distinct()
ht.all.16.17.18.19 %>%
group_by(yr, season, site) %>%
summarise(n= n()) %>%
ggplot(aes(yr, site)) +
geom_tile(aes(fill = n), color = 'black') +
theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
theme(axis.text.y = element_text(size = 9)) +
facet_wrap(~season) +
theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 9)) +
theme(axis.text.y = element_text(size = 7)) +
facet_wrap(~season) +
scale_fill_viridis(option = "C") +
labs(caption = "Combined 2016 and 2017 fall height")
ht.all.16.17.18.19 <- ht.all.16.17.18.19 %>%
mutate(site = case_when(site == "halibuff" ~ "hailbuff",
site == "cresc" ~ 'crescent',
site == "crescent" ~ "crescent",
site == "lost l" ~ "lostl",
site == "lost c" ~ "lostc",
site == "xtal" ~ "crystal",
TRUE ~ site))
ht.all.16.17.18.19 <- ht.all.16.17.18.19 %>%
mutate(site = case_when(wilid == "415" & is.na(site) ~ "ghole",
wilid == "418" & is.na(site) ~ "ghole",
wilid == "449" & is.na(site) ~ "glen",
wilid == "452" & is.na(site) ~ "glen",
TRUE ~ site)) %>%
mutate(site2 = paste0(site,"-", plot)) %>%
mutate(wilid_full = paste0(site2,"-", wilid))
## bind in the dcc from raw files
dcc.ht.fa.spr <- dcc.ht.fa.spr %>%
mutate(yr = as.character(yr))
dcc.ht.fa.spr %>%
visdat::vis_dat()
dcc.16.17.18.19 <- ht.all.16.17.18.19 %>%
mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>%
mutate(yr = as.character(yr)) %>%
# select(-c(browse, exp, dam)) %>%
bind_rows(., dcc.ht.fa.spr) %>%
mutate(site2 = paste0(site,"-",plot))
# dcc.16.17.18.19 <- dcc.16.17.18.19 %>%
# left_join(.,wilid.spp.lu2)
dcc.16.17.18.19 <- dcc.16.17.18.19 %>%
select(-c(spp, species)) %>%
distinct()
dcc.16.17.18.19 %>%
visdat::vis_miss()
dcc.16.17.18.19 %>%
group_by(yr, season, site) %>%
summarise(n= n()) %>%
ggplot(aes(yr, site)) +
geom_tile(aes(fill = n), color = 'black') +
theme_minimal() +
theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
theme(axis.text.y = element_text(size = 9)) +
facet_wrap(~season) +
theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 9)) +
theme(axis.text.y = element_text(size = 7)) +
facet_wrap(~season)
dcc.16.17.18.19 <- dcc.16.17.18.19 %>%
mutate(yr = as.character(yr))
## bind in other dcc object
dcc.16.17.18.19 <- ht.all18.19 %>%
mutate(yr = as.character(yr)) %>%
bind_rows(.,dcc.16.17.18.19) %>%
select(-c(species, live_status)) %>%
distinct()
### select
dcc.16.17.18.19 <- dcc.16.17.18.19 %>%
select(c(yr, season, site, site2, wilid, wilid_full, pl_ht_cm, plot))
##
w_ht <- w_ht %>%
# rename(plot = treat) %>%
rename(yr = year) %>%
mutate(yr = as.character(yr)) %>%
mutate(wilid_full = paste0(site,"-",plot,"-", wilid))
w_ht <- w_ht %>%
select(c(yr, season, site, site2, wilid, wilid_full, pl_ht_cm, plot))%>%
mutate(wilid = as.character(wilid))
## join in spp
# w_ht <- left_join(w_ht, wilid.spp.lu2)
# FALL 18 and 19 plus DCC
ht.combined <- dcc.16.17.18.19 %>%
mutate(pl_ht_cm = as.numeric(pl_ht_cm)) %>%
bind_rows(., w_ht) %>%
distinct()
## clean
ht.combined <- ht.combined %>%
mutate(site = case_when(site == "halibuff" ~ "hailbuff",
TRUE ~ site)) %>%
distinct()
#
# ht.combined %>%
# visdat::vis_miss()
ht.combined %>%
group_by(yr, season, site) %>%
summarise(n= n()) %>%
ggplot(aes(yr, site)) +
geom_tile(aes(fill = n), color = 'black') +
theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
theme(axis.text.y = element_text(size = 7)) +
facet_wrap(~season) +
labs(caption = "Combined DCC, 16-19")
dcc.ht.fa.spr %>%
group_by(yr, season, site) %>%
summarise(n= n()) %>%
ggplot(aes(yr, site)) +
geom_tile(aes(fill = n), color = 'black') +
theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
theme(axis.text.y = element_text(size = 7)) +
facet_wrap(~season) +
labs(title = "DCC data")
ht.combined <- ht.combined %>%
filter(pl_ht_cm < 700)
# ht.combined %>%
# distinct(site) %>%
# View()
ht.combined %>%
group_by(site2, season, yr) %>%
tally() %>%
ggplot(aes(yr, site2)) +
geom_tile(aes(fill = n)) +
theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
theme(axis.text.y = element_text(size = 7)) +
facet_wrap(~season) +
labs(caption = "Combined DCC, 16-19")
### the missing spring dcc data
# note: this is supplanted by code that pulls in
# both fa and spr data from raw dcc files
# nsf.ht.spr <- nsf.ht.spr %>%
# mutate(yr = as.character(yr), wilid = as.character(wilid)) %>%
# mutate(season = "spring") %>%
# mutate(site2 = paste0(site,"-",plot))
# nsf.ht.spr <- left_join(nsf.ht.spr, plot.lu, by = "plot")
# nsf.ht.spr %>% filter(is.na(species))
nsf.ht.fa.expobs <- nsf.ht.fa.expobs %>%
mutate(yr = as.character(yr)) %>%
mutate(wilid = as.character(wilid)) %>%
mutate(wilid_full = paste0(site,"-",plot,"wilid")) %>%
mutate(site2 = paste0(site,"-",plot))
# ht.combined <- bind_rows(ht.combined, nsf.ht spring and fall)
ht.combined <- bind_rows(ht.combined, nsf.ht.fa.expobs) %>%
distinct()
## use species lookup table to
#
# ht.combined %>%
# distinct(species) %>%
# write_csv("./data/lu_species_code.csv")
# clean
ht.combined <- ht.combined %>%
# mutate(species = case_when(species == 'beb' ~ "bebb",
# species == 'drumm' ~ "drum",
# species == 'boothii' ~ "booth",
# species == 'boothi' ~ "booth",
# species == 'lassiandra' ~ "lass",
# TRUE ~ species)) %>%
# filter(species != "?") %>%
mutate(plot = if_else(is.na(plot),"obs",plot)) %>% # filter(!(wilid == "570" & pl_ht_cm == 283)) %>%
filter(!is.na(site)) %>%
mutate(site = if_else(site == "crescent","cresc", site)) %>%
mutate(site2 = paste0(site,"-",plot)) %>%
mutate(wilid_full = paste0(site,"-",plot,"-",wilid))
# visdat::vis_miss(ht.combined)
ht.combined %>%
group_by(yr, season, site) %>%
summarise(n= n()) %>%
ggplot(aes(yr, site)) +
geom_tile(aes(fill = n), color = 'black') +
facet_wrap(~season)+
theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
theme(axis.text.y = element_text(size = 7)) +
facet_wrap(~season)
# ## find missing data
ht.combined %>%
group_by(yr, season, site) %>%
summarise(n= n()) %>%
ggplot(aes(yr, site)) +
geom_tile(aes(fill = n), color = 'black') +
facet_wrap(~season) +
theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
theme(axis.text.y = element_text(size = 7)) +
facet_wrap(~season)
# ggsave("./output/output4qaqc/height_heatmap20191204 1937.pdf", width = 11, height = 8.5)
# w_ht %>%
# group_by(yr, season, site) %>%
# summarise(n= n()) %>%
# ggplot(aes(yr, site)) +
# geom_tile(aes(fill = n), color = 'black') +
# facet_wrap(~season)
#clean sites
ht.combined <- ht.combined %>%
mutate(site = case_when(site == "lb41" ~ "lb4",
site == 'lost c' ~ "lostc",
site == "lost l" ~ "lostl",
TRUE ~ site)) %>%
mutate(site = case_when(site == "halibuff" ~ "hailbuff",
site == "cresc" ~ 'crescent',
site == "crescent" ~ "crescent",
site == "lost l" ~ "lostl",
site == "lost c" ~ "lostc",
site == "xtal" ~ "crystal",
TRUE ~ site))
# ht.combined <- ht.combined %>%
# mutate(pl_ht_cm = round(pl_ht_cm,0))
ht.combined <- ht.combined %>%
# mutate(wilid = as.numeric(round(pl_ht_cm,0))) %>%
mutate(wilid = as.character(wilid))
ht.combined <- ht.combined %>%
filter(wilid != "0") %>%
distinct()
ht.combined <- ht.combined %>%
mutate(site = case_when(site == "hailbuff"~ "hailbuf",
TRUE ~ site))
ht.combined %>%
tabyl(site) %>%
datatable()
#
ht.combined %>%
group_by(yr, season, site2) %>%
summarise(n= n()) %>%
ggplot(aes(yr, site2)) +
geom_tile(aes(fill = n), color = 'black') +
facet_wrap(~season) +
theme(axis.text.x = element_text(angle = 55, hjust = 1, size = 7)) +
theme(axis.text.y = element_text(size = 7)) +
facet_wrap(~season)
#### lu merge
# dcc.wilid.spp.lu
# wilid.spp.lu <- wilid.spp.lu %>%
# mutate(wilid_full = paste0(site, "-", plot, "-", wilid))
## create a lu with dcc raw and v1 wilid spp list
# wilid.spp.lu.2 <- bind_rows(wilid.spp.lu, dcc.wilid.spp.lu) %>%
# distinct()
# write_csv(wilid.spp.lu.2, "./data/wilid_spp._lu_v2.csv")
## attempt to get the 18 19 data back
ht.combined %>% visdat::vis_dat()
#######
sp1819 <- u18u19.ht %>%
# select(-c('file_name','exp',"dam","browse")) %>%
rename(species = spp) %>%
mutate(yr = as.character(yr)) %>%
# distinct(live_status)
filter(live_status == "live" | live_status == "live" | is.na(live_status)) %>%
select(-c(species,file_name))
## bind in the spring1819
ht.combined <- ht.combined %>%
bind_rows(., sp1819)
ht.combined <- ht.combined %>%
mutate(site2 = paste0(site,"-",plot)) %>%
mutate(wilid_full = paste0(site2,"-",wilid))
ht.combined %>%
visdat::vis_dat()
ht.combined %>%
# filter(plot != "obs") %>%
group_by(yr, season, site2) %>%
summarise(n= n()) %>%
ggplot(aes(yr, site2)) +
geom_tile(aes(fill = n), color = 'black') +
facet_wrap(~season)
# # type convert
# ht.combined.fin2 <- ht.combined.fin2 %>%
# mutate(exp = as.character(exp), dam = as.character(dam), browse = as.character(browse))
ht.combined <- ht.combined %>%
# filter(season == "spring") %>%
mutate(site_wilid = paste0(site,"-",wilid)) %>%
mutate(site = case_when(site == "lb41" ~ "lb4",
site == 'lost c' ~ "lostc",
site == "lost l" ~ "lostl",
TRUE ~ site))
ht.combined <- ht.combined %>%
select(yr, site, wilid, season, plot, pl_ht_cm, site2, wilid_full) %>%
distinct()
ht.combined %>%
filter(is.na(site))
## # A tibble: 3 x 8
## yr site wilid season plot pl_ht_cm site2 wilid_full
## <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 2019 <NA> 415 spring obs 174 NA-obs NA-obs-415
## 2 2019 <NA> 418 spring obs 116 NA-obs NA-obs-418
## 3 2019 <NA> 449 spring obs 97 NA-obs NA-obs-449
# ht.combined %>%
# distinct(site) %>% View()
# ht.combined <- ht.combined %>%
# # filter(season == "spring") %>%
# mutate(site = case_when(site == "lb41" ~ "lb4",
# site == 'lost c' ~ "lostc",
# site == "lost l" ~ "lostl",
# TRUE ~ site)) %>%
# select(yr, site, wilid, season, site, plot, pl_ht_cm, species)
# ht.combined.f3 <- bind_rows(ht.combined.fin2,ht.combined.final) %>%
# distinct()
# filter(plot != "obs") %>%
ht.combined %>%
# filter(plot == "obs") %>%
group_by(yr, season, site) %>%
summarise(n= n()) %>%
ggplot(aes(yr, site)) +
geom_tile(aes(fill = n), color = 'black') +
# facet_grid(season~yr) +
facet_wrap(~season)
ht.combined %>%
distinct(site, wilid, plot) %>%
arrange(wilid, site) %>%
datatable(filter = "top")
ht.combined <-
ht.combined %>%
filter(!is.na(wilid))
# filter(plot == 'obs' & site == "eb2") %>%
# filter(plot == 'obs' & site == "eb2")
ht.combined <- ht.combined %>%
mutate(plot = if_else(is.na(plot), "obs", plot))
ht.combined %>% visdat::vis_dat()
## more site name cleanup
ht.combined <- ht.combined %>%
mutate(site = case_when(site == "cresc" ~ "crescent",
site == "hailbuff" ~ "hailbuf",
TRUE ~ site))
## check site
# ht.combined.f3 %>%
# distinct(site) %>% View()
## Create site2
ht.combined <- ht.combined %>%
mutate(site2 = paste0(site,"-",plot))
# ht.combined.f3 <- ht.combined.f3 %>%
# filter(!is.na(site))
## Fa raw DCC. Note error in wilid=158, 2010
read_delim("./data/NSF_DataArchive20180208/Plant_level_fall_current_annual_growth_and_height_on_experimental_plot/willows-plantCAGfall2002-2015.txt", delim = " ") %>%
select(year, site, plot, species, willid, height) %>%
rename(yr = year, wilid = willid, pl_ht_cm = height) %>%
mutate(season = "fall") %>%
filter(wilid == "158")
## # A tibble: 13 x 7
## yr site plot species wilid pl_ht_cm season
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 2002 wb dx beb 158 127 fall
## 2 2003 wb dx beb 158 160 fall
## 3 2004 wb dx beb 158 180 fall
## 4 2005 wb dx beb 158 219 fall
## 5 2006 wb dx beb 158 240 fall
## 6 2007 wb dx beb 158 280 fall
## 7 2008 wb dx beb 158 298 fall
## 8 2009 wb dx beb 158 327 fall
## 9 2010 wb dx beb 158 1362 fall
## 10 2012 wb dx beb 158 382 fall
## 11 2013 wb dx beb 158 136 fall
## 12 2014 wb dx beb 158 364 fall
## 13 2015 wb dx beb 158 380 fall
# check specific records Tom identified
ht.combined %>%
distinct() %>%
filter (yr == '2010' & site == "wb") %>%
# datatable()
filter(wilid == "158")
## # A tibble: 3 x 8
## yr site wilid season plot pl_ht_cm site2 wilid_full
## <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr>
## 1 2010 wb 158 spring dx 338 wb-dx wb-dx-158
## 2 2010 wb 158 fall dx 362 wb-dx wb-dx-158
## 3 2010 wb 158 fall dx 1362 wb-dx wb-dx-158
Two issues here, now fixed: 1.plant height of 1362 would appear to be a data entry error in the raw file. This is 2010 data drawn from the DCC (library repository), so I don’t have the raw data to check. The value is in the raw file, not an artifact introduced in the data processing. I’ve changed it to “362”.
ht.combined %>%
distinct() %>%
filter (yr == '2010' & site == "wb") %>%
# datatable()
filter(wilid == "158") %>%
datatable(caption = "questionable records flagged by TH")
## correct the height
ht.combined <- ht.combined %>%
mutate(pl_ht_cm = if_else(pl_ht_cm == 1362, 362, pl_ht_cm)) %>%
mutate(pl_ht_cm = if_else(pl_ht_cm == 2877, 287, pl_ht_cm))
# these should be corrected in raw files?
1.Notice the different species: booth vs boothi, beb vs bebb. I ran code to find and replace such issues, but had it too early in the chain of transformations involved in the import and merging of the raw data. I have steps to remove duplicates, but b/c of the different species, they’re not the same.
## add full id to join with species
# wilid and site are not unique
ht.combined <- ht.combined %>%
mutate(wilid_full = paste0(site2,"-",wilid)) %>%
filter(!is.na(pl_ht_cm))
## cleaning
ht.combined <- ht.combined %>%
mutate(wilid_full = paste0(site2,"-",wilid))
# Eliminate species codes, find distinct, then join in the species from LU
ht.combined <- ht.combined %>%
distinct(yr, site, wilid, season, plot, pl_ht_cm, site2, wilid_full)
ht.combined <- ht.combined %>%
left_join(.,lu_spp_site2, by = "wilid_full")
ht.combined %>%
filter(is.na(species)) %>%
select(yr,season,wilid, wilid_full, pl_ht_cm) %>%
datatable(caption = "wilid_full lacking species. Manually check and update lookup table")
# manually correct missing sites in raw files and
# regenerate the full_id
ht.combined <- ht.combined %>%
mutate(site = case_when(wilid == "415" & is.na(site) ~ "ghole",
wilid == "418" & is.na(site) ~ "ghole",
wilid == "449" & is.na(site) ~ "glen",
wilid == "452" & is.na(site) ~ "glen",
TRUE ~ site)) %>%
mutate(site2 = paste0(site,"-", plot)) %>%
mutate(wilid_full = paste0(site2,"-", wilid))
## still issues with wilid decimal points
ht.combined <- ht.combined %>%
mutate(wilid = str_remove(.$wilid, ".0"))
## !!
ht.combined <- ht.combined %>%
mutate(wilid = as.numeric(wilid)) %>%
filter(!is.na(wilid)) %>%
filter(wilid != "0") %>%
mutate(wilid_full = paste0(site,"-",plot,"-",wilid)) %>%
select(-species) %>%
distinct()
# problem records: plot
ht.combined <- ht.combined %>%
mutate(plot = case_when(site == "crescent" & plot == "cc" ~ "obs",
TRUE ~ plot)) %>%
mutate(site2 = paste0(site,"-",plot)) %>%
mutate(wilid_full = paste0(site2,"-",wilid))
# mutate(plot = case_when(wilid_full == "crescent-cc-86" ~ "obs",
# TRUE ~ plot)) %>%
# filter(wilid_full == "crescent-cc-86") # was a problem
ht.combined %>% visdat::vis_dat()
ht.combined %>%
ggTile_yr_season_site2() +
labs(caption = "ht.combined")
## still missing species. Export records
# ht.combined %>%
# left_join(.,lu_spp_site2, by = "wilid_full") %>%
# filter(is.na(species)) %>%
# select(yr,season,wilid, wilid_full, pl_ht_cm) %>%
# distinct(wilid_full) %>%
# write_csv("./data/lu_spp_full_wilid_MANUALLY_figureout_and_add_wilid_full.csv")
ht.combined %>%
distinct(wilid) %>%
datatable(caption = "distinct wilid")
ht.combined %>%
distinct(wilid_full) %>%
datatable(caption = "distinct wilid_full")
ht.combined %>%
distinct(site) %>%
datatable(caption = "distinct site2")
##
ggTile_yr_season_site2(df = ht.combined)
## create site2 beaver occ lu
### export site 2 lu
# ht.combined.f3 %>%
# distinct(site, plot, site2) %>%
# write_csv("./data/lu_site_site2_beav.csv")
## manually update with LM input
# Beaver currently occupy crystal (2015?-2019) and wb4 (2017-2019).
#
# Lb4, lb5, and lb6 (2016/2017-2018), along with elk4 and elk5 (not sure of dates but not currently occupied).
# lu_site_beav <- read_csv("./data/lu_site_site2_beav.csv")
## plt join
# ## create lu for full id and spp
# ht.combined %>%
# left_join(., lu_wilid_full_spp) %>%
# distinct(wilid_full, species) %>%
# write_csv("./data/lu_wilid_full_spp.csv")
# !!!
## read in the updated
# lu_wilid_full_spp <- read_csv("./data/lu_wilid_full_spp.csv")
# ht.combined %>%
# visdat::vis_dat()
ht.combined <- ht.combined %>%
left_join(.,lu_wilid_full_spp)
ht.combined <- ht.combined %>%
left_join(.,plot.lu) %>%
select(-treat)
## join bv beav
ht.combined <- ht.combined %>%
left_join(., lu_site_beav)
ht.combined %>%
group_by(yr, site, plot) %>%
tally() %>%
ggplot(aes(yr,site)) +
geom_tile(aes(fill = n)) +
facet_wrap(~plot)
ht.combined %>%
# mutate(wilid = str_remove(.$wilid, ".0"))
filter(is.na(species))
## # A tibble: 2,096 x 18
## yr site wilid season plot pl_ht_cm site2 wilid_full species spp_notes
## <chr> <chr> <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <lgl>
## 1 2016 wb2 1 fall obs 239 wb2-~ wb2-obs-1 <NA> NA
## 2 2016 wb 2 fall cx 131 wb-cx wb-cx-2 <NA> NA
## 3 2016 wb 2 fall cx 376 wb-cx wb-cx-2 <NA> NA
## 4 2016 wb 7 fall cx 232 wb-cx wb-cx-7 <NA> NA
## 5 2016 wb 1 fall dc 254 wb-dc wb-dc-1 <NA> NA
## 6 2016 wb 1 fall cc 190 wb-cc wb-cc-1 <NA> NA
## 7 2016 wb 4 fall cc 108 wb-cc wb-cc-4 <NA> NA
## 8 2016 wb 7 fall cc 172 wb-cc wb-cc-7 <NA> NA
## 9 2016 wb 5 fall cc 70 wb-cc wb-cc-5 <NA> NA
## 10 2016 wb 3 fall dc 150 wb-dc wb-dc-3 <NA> NA
## # ... with 2,086 more rows, and 8 more variables: exp <chr>, dam <chr>,
## # browse <chr>, beav19 <chr>, beav18 <chr>, beav17 <chr>, beav16 <chr>,
## # beav15 <chr>
ht.combined %>% visdat::vis_dat()
ht.combined %>%
write_csv("./output/processed_data/ht_combined20191219_0126.csv")
## summarize by site2 and year using skimr
summary.by.site2 <- ht.combined %>%
# filter(wilid == "924") %>%
filter(!is.na(site)) %>%
mutate(year = as.factor(yr)) %>%
group_by(site, plot, site2,yr) %>%
nest() %>%
mutate(skim2 = map(data,skim)) %>%
unnest(skim2) %>%
ungroup()
## line plot
summary.by.site2 %>%
mutate(yr = as.numeric(yr)) %>%
filter(skim_variable == 'pl_ht_cm') %>%
filter(plot != "obs") %>%
# filter(!(site2 == "elk-dx" & yr == 2019)) %>%
ggplot(aes(yr)) +
# geom_ribbon(aes(ymin = numeric.p25, ymax = numeric.p75, fill = treat), alpha = 0.21) +
geom_line(aes(y=numeric.p50, color = plot)) +
geom_point(aes(y=numeric.p50, color = plot)) +
geom_pointrange(aes(y = numeric.p50, ymin = numeric.p25, ymax = numeric.p75, color = plot), alpha = 0.31) +
facet_wrap(~site) +
theme_minimal() +
labs(y="Median willow height (cm)", x = "Year", caption = "Willow height at experimental sites")
# ggsave("./output/median_willow_height_20191205_0924.png", width = 7, height = 5.5)
## working on this...
# summary.by.site2 %>%
# filter(contains(match = ""))
# filter(stat == "n_unique") %>%
# ggplot(aes(variable,value)) +
# geom_point() +
# theme(axis.text.x = element_text(angle=45, hjust=1, face="bold", color="black", size=10)) +
# facet_wrap(~site2, ncol=4)
##
summary.by.site2 %>%
select(yr, plot, site2, skim_variable, contains("numeric")) %>%
filter(!(site2 == "elk-dx" & yr == 2019)) %>%
filter(skim_variable == "pl_ht_cm") %>%
filter(plot != "obs") %>%
ggplot(aes(yr, site2)) +
geom_tile(aes(fill = numeric.mean), color = 'gray70') +
scale_fill_viridis() +
theme_light() +
labs(x = "Year", y = "", subtitle = "Mean willow height", fill = "Height (cm)", caption = getwd())
# ggsave("./output/median_willow_height_HM_20191205_0924.png", width = 7, height = 7)
summary.by.site2 %>%
select(yr, plot, site2, skim_variable, contains("numeric")) %>%
filter(!(site2 == "elk-dx" & yr == 2019)) %>%
filter(skim_variable == "pl_ht_cm") %>%
filter(plot != "obs") %>%
ggplot(aes(yr, plot)) +
geom_tile(aes(fill = numeric.mean)) +
scale_fill_viridis() +
theme_light() +
facet_wrap(~site2) +
labs(x = "Year", y = "", subtitle = "Mean willow height", fill = "Height (cm)", caption = getwd())
# ggsave("./output/median_willow_height_HM_20191205_0924.png", width = 7, height = 7)
## create species lookup
# ht.combined %>%
# mutate(species = case_when(species == 'beb' ~ "bebb",
# species == 'drumm' ~ "drum",
# species == 'boothii' ~ "booth",
# species == 'boothi' ~ "booth",
# species == 'lassiandra' ~ "lass",
# species == 'plantifolia' ~ "plan",
# species == 'planifolia' ~ "plan",
#
# TRUE ~ species)) %>%
# # filter(is.na(species))
# # filter(species == "na")
# distinct(site2, wilid, species) %>%
# filter(!is.na(species)) %>%
# filter(species != "na") %>%
# mutate(wilid_full = paste0(site2,"-",wilid)) %>%
# select(wilid_full, species) %>%
# write_csv("./data/lu_spp_site2.csv")
The metadata csv included in the DCC repository does not include all the variables.
In the DCC data, “browse = 0” indicates experimental exclosure treatment. For example, see willid 1 in EB1. The plot type is listed in the 2018 dataset as “dx”. In the 2001-2017 data, it’s dam == 1, browse == 0
In the supplemental material accompanying the 2013 Marshall et al. ProcRoySoc paper, the willid are attributed with “dammed” and “UNbrowsed”. This is confusing, since for “dam”, 1 is the experimental treatment, but for “browse”, 1 is the control.
#### old
## Import 2018 Data
prod.path <- "data/raw/production/Exp_2018_Production_Height_20181220.xlsx"
# the following seems to work for taking all of the tabs, importing them into a single tibble
# B/c of parsing errors, everything is brought in as char, so need to type-fix.
prod18.df <- prod.path %>%
excel_sheets() %>%
set_names() %>%
map(read_excel, path = prod.path, skip = 1, col_types = 'text') %>%
map(clean_names) %>%
map_df(.f = bind_rows, .id = "FNAME")
# I could theoretically type set on import through the 'col_types' arg, BUT some of the tabs have 126 columns, some 127... Not sure which columns differ. A different puzzle to solve at some point :)
## plot the count of observations
w_ht01_17 %>%
mutate(year = as.factor(year)) %>%
group_by(season, year) %>%
summarise(cnt = n()) %>%
ggplot(aes(year, cnt)) +
geom_bar(stat = 'identity', aes(fill = season), position = "dodge", color = "black") +
theme_bw()
# There's only two years (01,17) with Spring measurements...