Saturo Calculations & Data Validation

# Still to address
#1. Water level standard deviation threshold
#2. Rounding
#3. NASIS uploader sheet

1. Introduction

Output excel files from Saturo runs contain a calculated field-saturated hydraulic conductivity (Kfs) and it’s associated error (Kfs error), although, these values are based on the last cycle of any run. Paying attention to the live data outputs on the Saturo control unit in the field may is important during a run to avoid wasting time and water. However, in some cases, the last run can still contain noisy or irregular data due to unavoidable issues. Kfs and Kfs error from a previous cycle may be able to be substituted for the last cycle.

This document provides an explanation of how to use the R script to manually calculate Kfs and Kfs error for every cycle during a Saturo run. The code below is broken up into sections to make clear what each portion of the code is doing.

# Yellow highlighted box means you need to manually change something within that section of the code yourself.

2. Load libraries

Ensure that you have these packages installed prior to running the code.

library(tidyverse)
library(readxl)
#install.packages('formattable')
library(formattable)

3. Bring in your data

Change the path directory to where the file is stored (file_path)
Change the name of the file to the one of interest (file_name)

# Change directory to the file you want to analyze
file_path = "C:/Users/Amanda.Pennino/OneDrive - USDA/Documents/methods/Saturo/data/"
file_name = "Run1.xlsx"

4. Calculations

# Bring in your data
dat <- read_excel(paste0(file_path, file_name), sheet = "Raw Data")


# Create summary of run settings
## There will be a warning message that shows regarding column data types
summary <- read_excel(paste0(file_path, file_name), 
                      sheet = "Summary",
                      col_names = c("Settings", "Value"),
                      skip = 2,
                      col_types = c("guess", "numeric"))



# Reformat summary to wide format
summary <- summary %>% 
  drop_na(Settings) %>%
  spread(key = Settings, value = Value)

# Extract out units of measurement
## This is set on the control unit prior to the run
units <- data.frame(names = names(summary)) %>% 
  mutate(units = str_extract(.[[1]], "(?<=\\()([^()]*?)(?=\\)[^()]*$)")) %>%
  mutate(names = str_replace(.[[1]], " \\s*\\([^\\)]+\\)", "")) 

units2 <- data.frame(names = names(dat)) %>%
  mutate(units = str_extract(.[[1]], "(?<=\\()([^()]*?)(?=\\)[^()]*$)")) %>%
  mutate(names = str_replace(.[[1]], " \\s*\\([^\\)]+\\)", "")) 

units <- rbind(units, units2)

#Change column names in the data sheet to take out units of measurement
names(dat) <- units2[[1]]
rm(units2) #remove data frame


# Extract variables for calculations
presoak <- summary[[1,16]]
hold <- summary[[1,5]]
depth <- summary[[1,7]] # Ring depth
cycles <- summary[[1,10]] 
radius <- 7.5  # Ring radius
delta <- round((0.993 * depth) + (0.578 * radius), digits = 1)  # Calculate delta (ring geometry/shape factor)


# Determine high/low pressure periods
dat <- dat %>%
  mutate(setting = ifelse(Time <= presoak, "presoak", 
                          ifelse(Pressure > 6, "high", 
                                 ifelse(Pressure < 6, "low", NA) ))) 

nhigh <- as.numeric(nrow(dat[dat$setting == "high",])) 
nlow <- as.numeric(nrow(dat[dat$setting == "low",]))


# Add cycle numbers
dat <- dat %>%
  group_by(setting) %>%
  mutate(cycle = ifelse(setting == "presoak", 0,
                        ifelse(setting == "high", rep(c(1:cycles), each = nhigh/cycles, length.out = n()), 
                               ifelse(setting == "low", rep(c(1:cycles), each = nlow/cycles, length.out = n()), NA)))) %>%
  ungroup() %>%
  group_by(cycle, setting) %>%
  slice(3:n()) #takes out the first 2 minutes of each pressure setting for every cycle


# Calculate the Kfs & Kfs error for each cycle
## Kfs
Kfs <- dat %>%
  filter(setting != "presoak") %>%
  group_by(setting, cycle) %>%
  summarise(mFlux = mean(Flux),
            mPress = mean(Pressure)) %>%
  pivot_wider(names_from = setting, values_from = c(mFlux, mPress)) %>%
  mutate(Kfs = (delta * (mFlux_high - mFlux_low)) / (mPress_high - mPress_low)) %>%
  select(cycle, Kfs)


## Kfs error
high <- dat %>%
  filter(setting == "high") %>%
  group_by(setting) %>%
  rename(highP = Pressure,
         highQ = Flux) %>%
  ungroup()%>%
  select(highP, highQ, cycle)
  
low <- dat %>%
  filter(setting == "low") %>%
  group_by(setting) %>%
  rename(lowP = Pressure,
         lowQ = Flux) %>%
  ungroup()%>%
  select(lowP, lowQ)

dat2 <- cbind(high, low)
dat2 <- left_join(dat2, Kfs, by = "cycle")

Kfs_error <- dat2 %>%
  mutate(row_Kfs = (delta * (highQ - lowQ)) / (highP-lowP),
         row_error = (row_Kfs - Kfs)^2 ) %>%
  group_by(cycle) %>%
  summarise(Kfs_error = sqrt(sum(row_error)) / (hold - 2) )

Kfs_data <- left_join(Kfs, Kfs_error, by = "cycle")


## Standard Deviation of water level
sd_waterlevel <- dat %>%
  filter(cycle != 0) %>%
  group_by(cycle) %>%
  summarise(sd = sd(`Water Level`))

Kfs_data <- left_join(Kfs_data, sd_waterlevel, by = "cycle")

5. View the calculated results by cycle

# Highlight suspect cells by test
## Test 1: Is the standard error at least one order of magnitude or smaller than the Kfs value?

Kfs_data$mag_check <- (Kfs_data$Kfs / Kfs_data$Kfs_error) >= 10

## Test 3: Did the Water level maintain at 5cm? 
### What should the threshold be?
sd_threshold = 0.1 #cm
Kfs_data$level_check <- Kfs_data$sd <= sd_threshold

# Rename columns
names(Kfs_data) <- c("Cycle", 
                     paste0("Kfs (", units$units[units$names == "Kfs"], ")"), 
                     paste0("Kfs Error (", units$units[units$names == "Kfs Error"], ")"),
                     paste0("Water Level SD (", units$units[units$names == "Water Level"], ")"),
                     "Error Test", 
                     "Water Level Test")

# Change column order
Kfs_data <- Kfs_data[,c(1,4,3,2,5,6)]

# Change view of data to scientific notation with 3 significant digits
## Preserve original table first
Kfs_table <- Kfs_data

Kfs_table[[3]] <- formatC(Kfs_table[[3]], format = "e", 2)
Kfs_table[[4]] <- formatC(Kfs_table[[4]], format = "e", 2)
Kfs_table[[2]] <- signif(Kfs_table[[2]], 2)

#Create chart with tests

t <- formattable(Kfs_table,
  align = c("l",rep("r", NCOL(Kfs_table) - 1)),
  list("Cycle" = formatter("span", style = ~ style(color = "grey", font.weight = "bold")),
  `Error Test` = formatter("span",
  x ~ icontext(ifelse(x == "TRUE", "ok", "remove"), ifelse(x == "TRUE", "Yes", "No")),
  style = x ~ style(color = ifelse(x != "TRUE", "red", "green"))),
  `Water Level Test` = formatter("span",
  x ~ icontext(ifelse(x == "TRUE", "ok", "remove"), ifelse(x == "TRUE", "Yes", "No")),
  style = x ~ style(color = ifelse(x != "TRUE", "red", "green")))))

t

Cycle	Water Level SD (cm)	Kfs Error (cm/s)	Kfs (cm/s)	Error Test	Water Level Test
1	0.0051	7.34e-05	1.85e-03	Yes	Yes
2	0.0050	3.93e-05	1.49e-03	Yes	Yes
3	0.0054	5.73e-05	1.45e-03	Yes	Yes

6. Visually inspect data

# Put vlines denoting cycle? 
#Try to do a facet wrap to plot all? To reduce the need for plotly

#BRING IN RAW DATA 
raw <- read_excel(paste0(file_path, file_name), sheet = "Raw Data")
names(raw)[2] <- "Time"

raw_long <- raw %>%
  ungroup() %>%
  select(-`Record ID`) %>%
  pivot_longer(!Time, names_to = "metric", values_to = "value")

p <- raw_long %>%
  ggplot(aes(Time, value)) +
  geom_line(aes(color = metric), size = 1) +
  facet_grid(metric~., scales = "free_y", switch = "y") +
  ylab(" ") +
  scale_colour_brewer(type = "div") +
  theme_bw()+
  theme(strip.text.y = element_text(size = 10, hjust = 0.5,
          vjust = 0.5, face = 'bold')) +
  theme(legend.title=element_blank())

p

7. Read out your results

This will be saved in the same location as where the original datasheet was.

#Export data as a .csv file
write.csv(Kfs_data, paste0(file_path, "Results_", file_name))