First we want to load in packages, make a list of the files we want to work with, and create a function to read in the data.

knitr::opts_chunk$set(message = FALSE, warning = FALSE)
pacman::p_load(readr, dplyr, tidyr, ggplot2, plotly, lubridate)

DATA_BEGIN <- as_datetime("2022-06-20 00:00:00", tz = "EST")
DATA_END <- as_datetime("2022-07-22 23:59:59", tz = "EST")

EVENT_START <- as_datetime("2022-06-22 05:30:00", tz = "EST")
EVENT_STOP <- as_datetime("2022-06-22 14:30:00", tz = "EST")

theme_set(theme_minimal())

sf_inventory <- read_csv("../data/sapflow_inventory copy.csv")
## Rows: 87 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Tree_Code, Species, Grid_Square, Installation_Date
## dbl (4): Port, Tag, PakBus, Logger
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sapflow_files <- list.files(path = "../data/raw/sapflow", full.names = TRUE)

process_sapflow <- function(path) {

  read_csv(path, skip = 1) %>% 
    mutate(File_Name = basename(path)) %>% 
    separate(File_Name, into = c("one", "Logger")) %>% 
    select(-one) -> df
  
  df[-1:-2, ] -> df
  
  df %>% 
    pivot_longer(cols = contains("DiffVolt_Avg"), names_to = "Port", values_to = "Value") %>% 
    mutate(Port = parse_number(Port)) %>% 
    select(TIMESTAMP, Logger, Port, Value, BattV_Avg)
}

Then we want to use lapply to read in all data files at once using our function from above.

lapply(sapflow_files, process_sapflow) %>% 
  bind_rows() -> sapflow_raw

sapflow_raw %>% 
  mutate(Timestamp = ymd_hms(TIMESTAMP, tz = "EST"), 
         Value = as.numeric(Value),
         Logger = as.numeric(Logger),
         Date = date(Timestamp),
         Hour = hour(Timestamp))  %>% 
  left_join(sf_inventory, by = c("Logger", "Port")) %>% 
  filter(Timestamp >= DATA_BEGIN, Timestamp <= DATA_END, 
         Value > 0.02, !grepl("D", Tree_Code)) %>% 
  # filter visually bad data 
  mutate(Value = case_when(date(Timestamp) == "2022-07-09" ~ NA,
                           date(Timestamp) == "2022-06-27" ~ NA,
                           Tree_Code == "S2" ~ NA,
                           Tree_Code == "S4" ~ NA,
                           Tree_Code == "S8" ~ NA,
                           Tree_Code == "S9" & 
                             Timestamp > "2022-07-16 16:15:00" & Timestamp < "2022-07-17 13:30:00" ~ NA,
                           Tree_Code == "C2" & 
                             Timestamp > "2022-06-27 00:00:00" & Timestamp < "2022-06-29 23:59:59" ~ NA,
                           Tree_Code == "C16" ~ NA,
                           Tree_Code == "F9" ~ NA,
                           TRUE ~ Value)) %>%
  drop_na(Tree_Code) %>% 
  select(Timestamp, Date, Hour, Logger, Tree_Code, Species, Grid_Square, Port, Value) %>% 
  mutate(Plot = substr(Tree_Code,1,1),
         Plot = case_when(Plot == "C" ~ "Control",
                          Plot == "S" ~ "Saltwater",
                          Plot == "F" ~ "Freshwater")) -> sf_dat

For dTmax, we’re using the max sapflow method, which needs to meet the following criteria:

  1. Be between the hours of midnight and 5am

  2. Be the maximum sapflow value

sf_dat %>% 
  filter(Hour >= 0, Hour <= 5) %>% 
  group_by(Date, Plot, Logger, Port) %>% 
  summarise(dTmax = max(Value, na.rm = TRUE), dTmax_Timestamp = Timestamp[which.max(Value)]) -> sapflow_dtmax

Let’s graph the sapflow data with dTmax over it to make sure we calculated it correctly!

sf_dat %>% 
  filter(Plot == "Control") %>% 
  ggplot(aes(x = Timestamp, y = Value, group = Port, color= as.factor(Port))) + 
  geom_line() + 
  geom_point(data = filter(sapflow_dtmax, Plot == "Control"), aes(x = dTmax_Timestamp, y = dTmax), color = "black") + 
  facet_wrap(Logger~Plot, scales = "free") 

Now that we know dTmax is correct, we can calculate Fd (sap flux density, m3/m2/s) using the Granier (1985) equation:

\[ F_{d} = \alpha K^{b} \]

\[ F_{d} = 118.99 \cdot 10^{-6} \cdot (\frac{\Delta T_{max}}{\Delta T}- 1)^{1.231}\] To get to g/m2/s, \[ F_{d} = 360,000 \cdot F_{d}\]

sf_dat %>% 
  left_join(sapflow_dtmax, by = c("Date", "Plot", "Port", "Logger")) %>% 
  mutate(Fd = 360000 * (0.00011899) * (((dTmax / Value) - 1)^1.231)) -> sfd_data

Now let’s convert to g/m2hr and graph Fd to check

Sap Flux Density

ggplot(sfd_data, aes(x = Timestamp, y = (Fd), group = Tree_Code)) + 
  annotate("rect", xmin=EVENT_START, xmax=EVENT_STOP, ymin= -Inf, ymax=Inf, alpha=0.6, fill="lightblue") +
  geom_line() +
  facet_wrap(~Plot, ncol = 1) + ylab(expression(paste("Sap Flux Density (", F[d], ", g \u2022 ", cm^{-2}, " \u2022 ", hr^{-1}, ")"))) +
  theme(legend.position="bottom")

Hourly-averaged by plot

sfd_data %>% 
  group_by(Plot, Timestamp) %>% 
  summarise(Timestamp = mean(Timestamp, na.rm = TRUE), Fd_avg = mean(Fd, na.rm = TRUE)) -> sfd_plot_avg

ggplot(sfd_plot_avg) + 
  annotate("rect", xmin=EVENT_START, xmax=EVENT_STOP, ymin= -Inf, ymax=Inf, alpha=0.6, fill="lightblue") +
  geom_line(aes(Timestamp, y = Fd_avg, color = Plot)) + 
  facet_wrap(~Plot, ncol = 1) +
  scale_x_continuous(breaks = pretty(sfd_data$Timestamp, n = 5)) +
  ylab(expression(paste("Sap Flux Density (", F[d], ", g \u2022 ", cm^{-2}, " \u2022 ", hr^{-1}, ")")))