Figure | Flowchart of potential pathways of using compasstools for sapflow sensor data processing

Raw file

fn <- system.file("PNNL_11_sapflow_1min.dat", package = "compasstools")
dat_raw <- readLines(fn)
kable(dat_raw) %>% scroll_box(width = "100%")
x
"TOA5","PNNL_11","CR1000X","29517","CR1000X.Std.05.01","CPU:Tempest_v5_8_1_21.CR1X","45715","sapflow_1min"
"TIMESTAMP","RECORD","Statname","BattV_Avg","DiffVolt(1)","DiffVolt(2)","DiffVolt(3)","DiffVolt(4)","DiffVolt(5)","DiffVolt(6)","DiffVolt(7)","DiffVolt(8)","DiffVolt(9)","DiffVolt(10)","DiffVolt(11)","DiffVolt(12)","DiffVolt(13)","DiffVolt(14)"
"TS","RN","","Volts","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV","mV"
"","","Smp","Avg","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp","Smp"
"2022-06-14 15:36:00",0,"NAN",13.66,0.59,0.605,0.711,0.376,0.448,0.354,0.701,"NAN",0,0,0,0,0,0
"2022-06-14 15:37:00",1,"NAN",13.66,0.59,0.604,0.711,0.376,0.449,0.354,0.7,"NAN",0,0,0,0,0,0
"2022-06-14 15:38:00",2,"NAN",13.66,0.59,0.604,0.71,0.376,0.449,0.354,0.7,"NAN",0,0,0,0,0,0
"2022-06-14 15:39:00",3,"NAN",13.67,0.59,0.603,0.711,0.376,0.449,0.354,0.7,"NAN",0,0,0,0,0,0
"2022-06-14 15:40:00",4,"NAN",13.66,0.59,0.603,0.71,0.376,0.449,0.354,0.7,"NAN",0,0,0,0,0,0

Functions

read_sapflow_file()

The read_sapflow_file function reads a raw sapflow file, returning it as a data frame with minimal processing (the logger ID is extracted from the header and added as a column, but nothing else).

Example

dat1 <- read_sapflow_file(fn)
kable(dat1) %>% kable_styling("striped") %>% scroll_box(width = "100%")
Timestamp Record Statname BattV_Avg DiffVolt_Avg(1) DiffVolt_Avg(2) DiffVolt_Avg(3) DiffVolt_Avg(4) DiffVolt_Avg(5) DiffVolt_Avg(6) DiffVolt_Avg(7) DiffVolt_Avg(8) DiffVolt_Avg(9) DiffVolt_Avg(10) DiffVolt_Avg(11) DiffVolt_Avg(12) DiffVolt_Avg(13) DiffVolt_Avg(14) Logger
2022-06-14 15:36:00 0 NaN 13.66 0.59 0.605 0.711 0.376 0.448 0.354 0.701 NaN 0 0 0 0 0 0 PNNL_11
2022-06-14 15:37:00 1 NaN 13.66 0.59 0.604 0.711 0.376 0.449 0.354 0.700 NaN 0 0 0 0 0 0 PNNL_11
2022-06-14 15:38:00 2 NaN 13.66 0.59 0.604 0.710 0.376 0.449 0.354 0.700 NaN 0 0 0 0 0 0 PNNL_11
2022-06-14 15:39:00 3 NaN 13.67 0.59 0.603 0.711 0.376 0.449 0.354 0.700 NaN 0 0 0 0 0 0 PNNL_11
2022-06-14 15:40:00 4 NaN 13.66 0.59 0.603 0.710 0.376 0.449 0.354 0.700 NaN 0 0 0 0 0 0 PNNL_11

Function Code

print(read_sapflow_file)
## function (filename, min_timestamp = NULL, quiet = FALSE) 
## {
##     skip <- calculate_skip(filename, header_rows = 4, min_timestamp, 
##         quiet = quiet)
##     if (skip == -1) 
##         return(tibble())
##     dat_header <- read_lines(filename, n_max = 1)
##     pnnl_x <- gregexpr("PNNL_", dat_header[1])[[1]][1]
##     logger_name <- substr(dat_header[1], start = pnnl_x, stop = pnnl_x + 
##         6)
##     x <- read_csv(filename, skip = skip + 4, col_names = c("Timestamp", 
##         "Record", "Statname", "BattV_Avg", paste0("DiffVolt_Avg(", 
##             1:14, ")"), paste0("DiffVolt(", 1:14, ")")), col_types = paste0("c", 
##         strrep("d", 31)))
##     x$Logger <- logger_name
##     x
## }
## <bytecode: 0x7f86f2902d58>
## <environment: namespace:compasstools>

process_sapflow_dir()

The process_sapflow_dir function reads a folder of one or more raw sapflow files, returning them concatenated into a single data frame. The “Timestamp” column is parsed into a ‘real’ timestamp (i.e., PosixCT with time zone), and the data are reshaped into a long form with one observation per row.

Example

td <- tempdir()
newfile <- file.path(td, "sapflow.dat")
writeLines(dat_raw, con = newfile)
dat2 <- process_sapflow_dir(td, tz = "EST")
kable(head(dat2)) %>% kable_styling("striped") %>% scroll_box(width = "100%")
Timestamp Record Statname BattV_Avg Logger Port Value
2022-06-14 15:36:00 0 NaN 13.66 11 1 0.590
2022-06-14 15:36:00 0 NaN 13.66 11 2 0.605
2022-06-14 15:36:00 0 NaN 13.66 11 3 0.711
2022-06-14 15:36:00 0 NaN 13.66 11 4 0.376
2022-06-14 15:36:00 0 NaN 13.66 11 5 0.448
2022-06-14 15:36:00 0 NaN 13.66 11 6 0.354

Function Code

print(read_sapflow_file)
## function (filename, min_timestamp = NULL, quiet = FALSE) 
## {
##     skip <- calculate_skip(filename, header_rows = 4, min_timestamp, 
##         quiet = quiet)
##     if (skip == -1) 
##         return(tibble())
##     dat_header <- read_lines(filename, n_max = 1)
##     pnnl_x <- gregexpr("PNNL_", dat_header[1])[[1]][1]
##     logger_name <- substr(dat_header[1], start = pnnl_x, stop = pnnl_x + 
##         6)
##     x <- read_csv(filename, skip = skip + 4, col_names = c("Timestamp", 
##         "Record", "Statname", "BattV_Avg", paste0("DiffVolt_Avg(", 
##             1:14, ")"), paste0("DiffVolt(", 1:14, ")")), col_types = paste0("c", 
##         strrep("d", 31)))
##     x$Logger <- logger_name
##     x
## }
## <bytecode: 0x7f86f2902d58>
## <environment: namespace:compasstools>

Joining with Metadata

We have an inventory file that includes site and location information about the sensors…

inventory <- read_csv("sapflow_inventory copy.csv")

DT::datatable(inventory)

… and it gets joined with our dataframe to produce a Level 0 dataset.

sapflow_l0 <- dat2 %>% left_join(inventory, by = c("Logger", "Port"))
kable(head(sapflow_l0)) %>% kable_styling("striped") %>% scroll_box(width = "100%")
Timestamp Record Statname BattV_Avg Logger Port Value Tree_Code Species Tag PakBus Grid_Square Installation_Date
2022-06-14 15:36:00 0 NaN 13.66 11 1 0.590 C7 ACRU 1439 41 B3 3/19/21
2022-06-14 15:36:00 0 NaN 13.66 11 2 0.605 C8 ACRU 1436 41 C4 3/19/21
2022-06-14 15:36:00 0 NaN 13.66 11 3 0.711 C10 ACRU 1387 41 D4 3/17/21
2022-06-14 15:36:00 0 NaN 13.66 11 4 0.376 C11 ACRU 1395 41 F2 3/17/21
2022-06-14 15:36:00 0 NaN 13.66 11 5 0.448 C16 FAGR 1390 41 E4 3/17/21
2022-06-14 15:36:00 0 NaN 13.66 11 6 0.354 C18 FAGR 1452 41 NA 3/17/21