Initializing

I see 13 data files to process.

I see 1 metadata files to process.

Measurement length is 300 seconds.

Default dead band time is 10 seconds.

Required metafields are: Date, Time_start, Plot, Area, Volume, Temp.

Writing output to /Users/d3x290/Desktop/Code/sw/output.

HTML outfile is ghg-processor.html.

Working directory is /Users/d3x290/Desktop/Code/sw.

Data

Read in GHG data

Code

errors <- 0 # error count

# Function to read a data file given by 'fn', filename
# Returns NULL if there's an error reading
read_ghg_data <- function(fn) {
  basefn <- basename(fn)
  message(Sys.time(), " Processing ", basefn)
  
  dat_raw <- readLines(fn)
  # Save the serial number in case we want to look at machine differences later
  sn <- trimws(gsub("SN:\t", "", dat_raw[2], fixed = TRUE))
  # Parse the timezone from the header and use it to make a TIMESTAMP field
  tz <- trimws(gsub("Timezone:\t", "", dat_raw[5], fixed = TRUE))
  
  # These files have five header lines, then the names of the columns in line 6,
  # and then the column units in line 7. We only want the names
  dat_raw <- dat_raw[-c(1:5, 7)]
  # Irritatingly, the units line can repeat in the file (???!?). Remove these
  dat_raw <- dat_raw[grep("DATAU", dat_raw, invert = TRUE)]
  # Double irritatingly, if there's no remark, the software write \t\t, not
  # \t""\t, causing a read error. Replace these instances
  dat_raw <- gsub("\t\t", "\tnan\t", dat_raw, fixed = TRUE)
  dat <- try({
    readr::read_table(I(dat_raw), na = "nan", guess_max = 1e4)
  })
  
  # If the try() above succeeded, we haev a data frame and can process it 
  if(is.data.frame(dat)) {
    message("\tRead in ", nrow(dat), " rows of data, ", 
            min(dat$DATE), " to ", max(dat$DATE))
    message("\tInstrument serial number: ", sn)
    dat$SN <- sn
    message("\tInstrument time zone: ", tz)
    # We record the time zone but don't convert the timestamps to it as
    # that's not needed right now; metadata time is guaranteed to be same
    dat$TIMESTAMP <- lubridate::ymd_hms(paste(dat$DATE, dat$TIME))
    dat$TZ <- tz
    # Remove unneeded Licor DATE and TIME columns
    dat$DATE <- dat$TIME <- NULL
    dat$Data_file <- basefn
    return(dat)
  } else {
    warning("File read error for ", basefn)
    errors <<- errors + 1
    return(NULL)
  }
}

dat <- lapply(data_files, read_ghg_data)

2023-02-21 08:39:46 Processing COMPASS_June2022_Licor_Data.txt

    Read in 61170 rows of data, 2022-06-06 to 2022-06-10

    Instrument serial number: TG10-01087

    Instrument time zone: EST

2023-02-21 08:39:47 Processing TG10-01087-2022-07-11T060000.data.txt

    Read in 60697 rows of data, 2022-07-11 to 2022-07-14

    Instrument serial number: TG10-01087

    Instrument time zone: EST

2023-02-21 08:39:48 Processing TG10-01087-2022-08-15T070000.data.txt

    Read in 15978 rows of data, 2022-08-15 to 2022-08-15

    Instrument serial number: TG10-01087

    Instrument time zone: EST

2023-02-21 08:39:49 Processing TG10-01087-2022-08-18T070000.data.txt

    Read in 13984 rows of data, 2022-08-18 to 2022-08-18

    Instrument serial number: TG10-01087

    Instrument time zone: EST

2023-02-21 08:39:49 Processing TG10-01087-2022-08-31T010000.data.txt

    Read in 17285 rows of data, 2022-08-31 to 2022-08-31

    Instrument serial number: TG10-01087

    Instrument time zone: EST

2023-02-21 08:39:50 Processing TG10-01087-2022-09-18T060000.data.txt

    Read in 8557 rows of data, 2022-09-18 to 2022-09-18

    Instrument serial number: TG10-01087

    Instrument time zone: EST

2023-02-21 08:39:50 Processing TG10-01087-2022-09-22T070000.data.txt

    Read in 16247 rows of data, 2022-09-22 to 2022-09-22

    Instrument serial number: TG10-01087

    Instrument time zone: EST

2023-02-21 08:39:51 Processing TG10-01087-2022-10-11T060000.data.txt

    Read in 12379 rows of data, 2022-10-11 to 2022-10-11

    Instrument serial number: TG10-01087

    Instrument time zone: EST

2023-02-21 08:39:51 Processing TG10-01087-2022-10-14T070000.data.txt

    Read in 14142 rows of data, 2022-10-14 to 2022-10-14

    Instrument serial number: TG10-01087

    Instrument time zone: EST

2023-02-21 08:39:51 Processing TG10-01087-2022-10-27T050000.data.txt

    Read in 14812 rows of data, 2022-10-27 to 2022-10-27

    Instrument serial number: TG10-01087

    Instrument time zone: EST

2023-02-21 08:39:52 Processing TG10-01087-2022-11-13T060000.data.txt

    Read in 15748 rows of data, 2022-11-13 to 2022-11-13

    Instrument serial number: TG10-01087

    Instrument time zone: EST

2023-02-21 08:39:52 Processing TG10-01087-2022-11-15T080000.data.txt

    Read in 30126 rows of data, 2022-11-15 to 2022-11-17

    Instrument serial number: TG10-01087

    Instrument time zone: EST

2023-02-21 08:39:53 Processing TG10-01286-2022-09-19T060000.data.txt

    Read in 9996 rows of data, 2022-09-19 to 2022-09-19

    Instrument serial number: TG10-01286

    Instrument time zone: US/Eastern

Code

dat <- do.call("rbind", dat)

Errors: 0

Total observations: 291,121

Read in metadata

Code

errors <- 0

# Function to read a metadata file given by 'fn', filename
# Returns NULL if there's an error reading
read_metadata <- function(fn) {
  basefn <- basename(fn)
  message(Sys.time(), " Processing ", basefn)
  
  metadat_raw <- try({
    read_excel(fn)
  })
  
  if(is.data.frame(metadat_raw)) {
    if(!all(METADATA_REQUIRED_FIELDS %in% names(metadat_raw))) {
      warning("Metadata file ", basefn, " doesn't have required fields!")
      return(NULL)
    }

    metadat_raw$Metadata_file <- basefn
    # Note that it's guaranteed that metadata time is instrument time,
    # so we don't worry about any time zones right now
    metadat <- metadat_raw
    
    # Metadata files can, optionally, have "Dead_band" and "Msmt_length" columns
    # If not present, insert those columns with NA values
    if(!"Dead_band" %in% names(metadat)) {
      metadat$Dead_band <- NA_real_
    } else {
      message("\tCustom Dead_band column exists")
    }
    if(!"Msmt_stop" %in% names(metadat)) {
      metadat$Msmt_stop <- NA_real_
    } else {
      message("\tCustom Msmt_stop column exists")
    }
    
  } else {
    warning("File read error for ", basefn)
    errors <<- errors + 1
    return(NULL)
  }

  metadat
}

metadat <- lapply(metadata_files, read_metadata)

2023-02-21 08:39:54 Processing COMPASS_ChamberFlux_Metadata_2022.xlsx

    Custom Dead_band column exists

    Custom Msmt_stop column exists

Code

metadat <- do.call("rbind", metadat)
if(!nrow(metadat)) stop("No metadata read!")

metadat$Obs <- seq_len(nrow(metadat))

Errors: 0

Total metadata rows: 119

Metadata-data matching

Code

# For each row of metadata, find corresponding observational data
zero_matches <- data.frame()
match_info <- data.frame()

matched_dat <- list()
for(i in seq_len(nrow(metadat))) {
  ts <- metadat$Time_start[i]
  start_time <- metadat$Date[i] + hour(ts) * 60 * 60 + minute(ts) * 60 + second(ts)
  end_time <- start_time + params$MSMT_LENGTH_SEC

  # Subset data following timestamp in metadata and store
  x <- subset(dat, TIMESTAMP >= start_time & TIMESTAMP < end_time)
  
#  message("Metadata row ", i, " ", metadat$Plot[i], " start = ", start_time, " matched ", nrow(x), " data rows")
  info <- data.frame(Row = i,
                     Plot = metadat$Plot[i],
                     start_time = start_time,
                     Rows_matched = nrow(x),
                     Min_timestamp = NA,
                     Max_timestamp = NA)
  if(nrow(x)) {
    # We have matched data!
    x$Obs <- i
    
    # Assign dead band: value given in Quarto params, but can be overridden in metadata    
    if(is.na(metadat$Dead_band[i])) {
      x$DEAD_BAND_SEC <- params$DEAD_BAND_SEC
    } else {
      x$DEAD_BAND_SEC <- metadat$Dead_band[i]
    }
    # Assign stop time (in seconds, not including dead band): value given in 
    # Quarto params, but can be overridden in metadata    
    if(is.na(metadat$Msmt_stop[i])) {
      x$MSMT_STOP_SEC <- params$MSMT_LENGTH_SEC
    } else {
      x$MSMT_STOP_SEC <- metadat$Msmt_stop[i]
    }
    
    # Calculate ELAPSED_SECS, which is relative time (in s) since start of measurement
    x$ELAPSED_SECS <- time_length(interval(min(x$TIMESTAMP), x$TIMESTAMP), "seconds")
    info$Min_timestamp <- min(x$TIMESTAMP)
    info$Max_timestamp <- max(x$TIMESTAMP)
  } else {
    # No data matched the date and time given in this row of the metadata
    message("\tWARNING; file: ", metadat$File[i])
    zero_matches <- rbind(zero_matches, data.frame(File = metadat$File[i],
                                                   Row = i,
                                                   Date = metadat$Date[i],
                                                   Time_start = metadat$Time_start[i]))
  }
  match_info <- rbind(match_info, info)
  matched_dat[[i]] <- x
}

# Combine all subsetted data together and merge with metadata 
combined_dat <- do.call("rbind", matched_dat)
combined_dat <- merge(metadat, combined_dat, by = "Obs")

# Compute a few basic stats about the combined data
n <- nrow(combined_dat)
neg_CO2 <- sum(combined_dat$CO2 < 0, na.rm = TRUE)
na_CO2 <- sum(is.na(combined_dat$CO2), na.rm = TRUE)
na_CH4 <- sum(is.na(combined_dat$CH4), na.rm = TRUE)

datatable(match_info)

Checking for metadata rows with no matching data…

Code

# Table of zero-match data, if any
if(nrow(zero_matches)) {
  knitr::kable(zero_matches)
}

Checking for unused data files…

Code

# Table of unused files, if any
if(length(unused_files)) {
  knitr::kable(data.frame(Data_file = unused_files))
}

Total rows of combined data: 35,705

CO2 and CH4 measurement data

Observations with negative CO2: 0 (0%)

Observations with missing CO2: 0 (0%)

Distribution of CO2 values:

Code

# Print distribution and (below) plot random subsample of CO2 data
OVERALL_PLOT_N <- 300
summary(combined_dat$CO2)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  409.0   455.7   471.5   489.7   505.3  1113.4

Plot of 300 random CO2 values (y axis 5%-95%):

Code

ylim_co2 <- quantile(combined_dat$CO2, probs = c(0.05, 0.95), na.rm = TRUE)

dat_small <- combined_dat[sample.int(n, OVERALL_PLOT_N, replace = TRUE),]
ggplot(dat_small, aes(TIMESTAMP, CO2, color = Plot)) + 
  geom_point(na.rm = TRUE) +
  coord_cartesian(ylim = ylim_co2)

Observations with missing CH4: 0 (0%)

Distribution of CH4 values:

Code

# Print distribution and (below) plot random subsample of CH4 data
summary(combined_dat$CH4)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1959    2049    2084    2108    2131    3283

Plot of 300 random CH4 values (y axis 5%-95%):

Code

ylim_ch4 <- quantile(combined_dat$CH4, probs = c(0.05, 0.95), na.rm = TRUE)

dat_small <- combined_dat[sample.int(n, OVERALL_PLOT_N, replace = TRUE),]
ggplot(dat_small, aes(TIMESTAMP, CH4, color = Plot)) + 
  geom_point(na.rm = TRUE) +
  coord_cartesian(ylim = ylim_ch4)

Observations by plot and day

Code

ggplot(combined_dat, aes(ELAPSED_SECS, CO2, group = Obs)) + 
  geom_point(na.rm = TRUE, color = "purple") +
  facet_wrap(~Plot, scales = "free") +
  # Plot both linear and curvilinear fits
  #geom_smooth(method = lm, formula = 'y ~ poly(x, 2)', na.rm = TRUE, linetype = 2, color = "blue") +
  geom_smooth(method = lm, formula = 'y ~ x', na.rm = TRUE, color = "black", linewidth = 0.5) +
  geom_vline(aes(xintercept = DEAD_BAND_SEC), linetype = 2, color = "green") +
  geom_vline(aes(xintercept = MSMT_STOP_SEC), linetype = 2, color = "red")

Code

ggplot(combined_dat, aes(ELAPSED_SECS, CH4, group = Obs)) + 
  geom_point(na.rm = TRUE, color = "blue") +
  facet_wrap(~Plot, scales = "free") +
  #geom_smooth(method = lm, formula = 'y ~ poly(x, 2)', na.rm = TRUE, linetype = 2, color = "blue") +
  geom_smooth(method = lm, formula = 'y ~ x', na.rm = TRUE, color = "black", linewidth = 0.5) +
  geom_vline(aes(xintercept = DEAD_BAND_SEC), linetype = 2, color = "green") +
  geom_vline(aes(xintercept = MSMT_STOP_SEC), linetype = 2, color = "red")

Fluxes

Model fitting

Code

model_fit_error <- FALSE
# Fit a model to units of gas per day
fit_model <- function(df, depvar) {
  # Metadata
  info <- data.frame(Obs = df$Obs[1],
                     TIMESTAMP = mean(df$TIMESTAMP),
                     DEAD_BAND_SEC = unique(df$DEAD_BAND_SEC),
                     MSMT_STOP_SEC = unique(df$MSMT_STOP_SEC),
                     TZ = df$TZ[1],
                     SN = df$SN[1],
                     Data_file = df$Data_file[1],
                     Gas = depvar,
                     Volume = df$Volume[1],
                     Temp = df$Temp[1],
                     Area = df$Area[1]
                     )
  # Remove the dead band data
  df <- df[df$ELAPSED_SECS > df$DEAD_BAND_SEC,]
  # Remove the after-end data
  df <- df[df$ELAPSED_SECS <= df$MSMT_STOP_SEC,]
  # Fit a linear model
  try(mod <- lm(df[,depvar] ~ df$ELAPSED_SECS))
  if(!exists("mod")) {
    message("ERROR fitting model for obs ", df$Obs[1], " ", df$Plot[1], " ", depvar)
    model_fit_error <- model_fit_error & FALSE
  }
  # Model statistics
  model_stats <- glance(mod)
  # Slope and intercept statistics
  slope_stats <- tidy(mod)[2,-1]
  names(slope_stats) <- paste("slope", names(slope_stats), sep = "_")
  intercept_stats <- tidy(mod)[1,-1]
  names(intercept_stats) <- paste("int", names(intercept_stats), sep = "_")
  # Add robust regression slope as a QA/QC check
  robust <- MASS::rlm(df[,depvar] ~ df$ELAPSED_SECS)
  slope_stats$slope_estimate_robust <- coefficients(robust)[2]
  # Add polynomial regression R2 as a QA/QC check
  poly <- lm(df[,depvar] ~ poly(df$ELAPSED_SECS, 3))
  slope_stats$r.squared_poly <- summary(poly)$r.squared
  
  res <- cbind(info, model_stats, slope_stats, intercept_stats)
  
  # Round to a sensible number of digits and return
  numerics <- sapply(res, is.numeric)
  res[numerics] <- round(res[numerics], 3)
  res
}
results_ch4 <- lapply(split(combined_dat, combined_dat$Obs), fit_model, "CH4")
results_ch4 <- do.call("rbind", results_ch4)
results_co2 <- lapply(split(combined_dat, combined_dat$Obs), fit_model, "CO2")
results_co2 <- do.call("rbind", results_co2)

Unit conversion

Code

required_fields <- c("Volume", "Temp")
if(!all(required_fields %in% names(combined_dat))) {
  stop("We need volume ('Volume') and temperature ('Temp')")  
}

# The slopes and their errors, from model-fitting above, are in ppb/s (CH4) 
# and ppm/s (CO2). Convert them to µmol/m2/day and mmol/m2/day respectively.

# CH4
# Use known volume of chamber to convert ppb/s to liters/s:
#   ppb CH4 * volume of chamber (m3) * 1000 L per m3
CH4_slope_L <- with(results_ch4, (slope_estimate / 1e9) * Volume * 1000)
CH4_slope_err_L <- with(results_ch4, (slope_std.error / 1e9) * Volume * 1000)

# Use ideal gas law to calculate µmol of CH4 per m2 ground:
# (atm pressure * L CH4) / (R [in L*atm / T[K] * mol] * T[K]) * 10^6 µmol/mol
TEMP_K <- with(results_ch4, Temp + 273.15)
AREA <- results_ch4$Area
S_PER_DAY <- 60 * 60 * 24
results_ch4$flux_estimate <- (1 * CH4_slope_L) / (0.08206 * TEMP_K) * 1e6 / AREA * S_PER_DAY
results_ch4$flux_units <- "µmol/m2/day"
results_ch4$flux_std.error <- (1 * CH4_slope_err_L) / (0.08206 * TEMP_K) * 1e6 / AREA * S_PER_DAY

# CO2
# Use known volume of chamber to convert ppm/s to liters/s:
#   ppm CO2 * volume of chamber (m3) * 1000 L per m3
CO2_slope_L <- with(results_co2, (slope_estimate / 1e9) * Volume * 1000)
CO2_slope_err_L <- with(results_co2, (slope_std.error / 1e9) * Volume * 1000)
# Use ideal gas law to calculate mmol of CO2 per m2 ground:
# (atm pressure * L CO2) / (R [in L*atm / T[K] * mol] * T[K]) * 10^3 mmol/mol
#combined_dat$`CO2 mmol/m2` <- (1 * CO2_L) / (0.08206 * TEMP_K) * 1e3 / AREA
TEMP_K <- with(results_co2, Temp + 273.15)
AREA <- results_co2$Area
results_co2$flux_estimate <- (1 * CO2_slope_L) / (0.08206 * TEMP_K) * 1e6 / AREA * S_PER_DAY
results_co2$flux_units <- "mmol/m2/day"
results_co2$flux_std.error <- (1 * CO2_slope_err_L) / (0.08206 * TEMP_K) * 1e6 / AREA * S_PER_DAY

# Combine CH4 and CO2 results and re-merge with metadata
results <- rbind(results_co2, results_ch4)
# Drop columns we don't need because they'll come in with the merge
results$Area <- results$Temp <- results$Volume <- NULL

# Re-merge with metadata
results <- as_tibble(merge(results, metadat, by = "Obs"))

Total rows of results: 238

Flux summary

Code

# Flux table and visualization

formatRound(datatable(results),
            which(sapply(results, is.numeric)), digits = 3)

Code

ggplot(results, aes(x = TIMESTAMP, y = flux_estimate, color = Plot)) + 
  geom_point() + 
  geom_errorbar(aes(ymin = flux_estimate - flux_std.error,
                    ymax = flux_estimate + flux_std.error)) +
  geom_line(aes(group = Plot)) +
  facet_wrap(~paste(Gas, flux_units) + Site, scales = "free")

QA/QC

R2 distribution

Code

results$FLAG_R2 <- onecol_plot_and_flag(results,
                                        "adj.r.squared",
                                        right = FALSE)

Code

plot_group(subset(results, FLAG_R2), combined_dat)

y-intercept distribution

Code

# The intercept is quite variable, so use {0.01, 0.99}
results$FLAG_INTERCEPT <- onecol_plot_and_flag(results, "int_estimate", 
                                               probs = c(0.01, 0.99))

Code

plot_group(subset(results, FLAG_INTERCEPT), combined_dat)

Robust regression divergence

This can indicate outlier problems or ebullition events.

Code

results$FLAG_ROBUST_DIV <- twocol_plot_and_flag(results,
                                                "slope_estimate", 
                                                "slope_estimate_robust",
                                                add_1to1_line = TRUE)

Code

plot_group(subset(results, FLAG_ROBUST_DIV), combined_dat)

Polynomial regression divergence

This can indicate curvature of the gas concentrations time series due to saturation, etc.

Code

results$FLAG_POLY_DIV <- twocol_plot_and_flag(results,
                                                "adj.r.squared", 
                                                "r.squared_poly",
                                                add_1to1_line = TRUE)

Code

plot_group(subset(results, FLAG_POLY_DIV), combined_dat)

QA/QC summary

Code

res1 <- results["Obs"]
res2 <- results[grepl("^FLAG_", names(results))]
res <- pivot_longer(cbind(res1, res2), starts_with("FLAG"), names_to = "Flag")
res <- subset(res, value)
f <- function(x) paste(unique(x), collapse = ", ")
knitr::kable(aggregate(Obs ~ Flag, data = res, f))

Flag	Obs
FLAG_INTERCEPT	1, 3, 15, 18, 28, 55, 76
FLAG_POLY_DIV	10, 34, 88, 104
FLAG_R2	5, 10, 88, 93, 104, 107, 110, 113
FLAG_ROBUST_DIV	2, 10

Output

Data

Code

now_string <- function() format(Sys.time(), "%Y-%m-%d")
mkfn <- function(name, extension) { # "make filename" helper function
  file.path(params$output_folder,
            paste(paste(name, now_string(), sep = "_"), extension, sep = "."))
}

flux_fn <- mkfn("results", "csv")
message("Writing final flux dataset ", flux_fn, "...")

Writing final flux dataset output//results_2023-02-21.csv...

Code

write_csv(results, flux_fn)
obs_fn <- mkfn("observations", "csv")
message("Writing compiled observations dataset ", obs_fn, "...")

Writing compiled observations dataset output//observations_2023-02-21.csv...

Code

write_csv(combined_dat, obs_fn)

if(params$SAVE_RESULTS_FIGURES) {
  message("Writing plot outputs...", appendLF = FALSE)
  # Loop through each line, generating plot and filename and saving
  for(i in seq_len(nrow(results))) {
    p <- plot_group(results[i,], combined_dat)
    fn <- mkfn(paste("obs", results$Obs[i], results$Date[i],
                     results$Plot[i], sep = "_"), "pdf")
    ggsave(fn, plot = p, width = 8, height = 5)
  }
  message("\t", i, " written")
}

Writing plot outputs...

    238 written

Metadata

Note this doesn’t include any ‘extra’ fields in the metadata file.

Code

results_fields <- c(
  "adj.r.squared" = "Linear flux model adjusted R2",
  "AIC" = "Linear flux model AIC",
  "Area" = "Area entry from metadata file, m2",
  "BIC" = "Linear flux model BIC",
  "Data_file" = "Data file name",
  "Date" = "Date entry from metadata file",
  "DEAD_BAND_SEC" = "Dead band used for flux calculation, s",
  "Dead_band" = "Dead band entry from metadata file, if present, s",
  "deviance" = "Linear flux model deviance",
  "df.residual" = "Linear flux model degrees of freedom",
  "df" = "Linear flux model degrees of freedom",
  "FLAG_INTERCEPT" = "Flag: linear model intercept flagged as unusual",
  "FLAG_POLY_DIV" = "Flag: linear model R2 diverges from polynomial model",
  "FLAG_R2" = "Flag: linear model R2 flagged as unusual",
  "FLAG_ROBUST_DIV" = "Flag: linear model flux diverges from robust model",
  "flux_estimate" = "Gas flux computed from slope of linear flux model",
  "flux_std.error" = "Standard error of flux_estimate",
  "flux_units" = "Units of flux_estimate and flux_std.error",
  "Gas" = "Gas (CO2 or CH4)",
  "int_estimate" = "Intercept of linear flux model, ppb (CH4) or ppm (CO2)",
  "int_p.value" = "P-value of intercept of linear flux model",
  "int_statistic" = "Statistic (t-value) of intercept of linear flux model",
  "int_std.error" = "Standard error of intercept of linear flux model",
  "logLik" = "Linear flux model log-likelihood",
  "Metadata_file" = "Metadata file name",
  "MSMT_STOP_SEC" = "Measurement stop used for flux calculation, s",
  "Msmt_stop" = "Measurement stop entry from metadata file, if present, s",
  "nobs" = "Linear flux model number of observations",
  "Obs" = "Observation number, i.e. order in metadata file(s)",
  "p.value" = "Linear flux model p-value",
  "Plot" = "Plot entry from metadata file",
  "r.squared_poly" = "Polynomial flux model R2",
  "r.squared" = "Linear flux model R2",
  "sigma" = "Linear flux model sigma",
  "slope_estimate_robust" = "Slope of robust flux model, ppb (CH4) or ppm (CO2) /s",
  "slope_estimate" = "Slope of linear flux model, ppb (CH4) or ppm (CO2) /s",
  "slope_p.value" = "P-value of slope of linear flux model",
  "slope_statistic" = "Statistic (t-value) of slope of linear flux model",
  "slope_std.error" = "Standard error of slope of linear flux model",
  "SN" = "Serial number from analyzer",
  "statistic" = "Linear flux model F-statistic",
  "Temp" = "Temperature entry from metadata file, degC",
  "Time_start" = "Time_start entry from metadata file",
  "TIMESTAMP" = "Original timestamp from analyzer",
  "TZ" = "Time zone from analyzer",
  "Volume" = "Volume entry from metadata file, m3"
)

# Which metadata names above don't occur in results?
not_there <- names(results_fields)[!names(results_fields) %in% names(results)]
if(length(not_there)) {
  message("Field metadata descriptions NOT in results: ", paste(not_there, collapse = ", "))
}

results_meta <- data.frame(Field_name = names(results))
results_meta$Description <- results_fields[results_meta$Field_name]
# Mark any metadata fields that aren't required - they're extra, from user
from_metadata <- is.na(results_meta$Description) &
  results_meta$Field_name %in% names(metadat)
results_meta$Description[from_metadata] <- "<from metadata file>"
datatable(results_meta)

Code

metadata_fn <- mkfn("results_metadata", "csv")
message("Writing metadata ", metadata_fn, "...")

Writing metadata output//results_metadata_2023-02-21.csv...

Code

write_csv(results_meta, metadata_fn)

All done!

Reproducibility

Code

sessionInfo()

R version 4.1.3 (2022-03-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur/Monterey 10.16

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] tidyr_1.3.0     MASS_7.3-55     DT_0.23         broom_0.8.0    
[5] readxl_1.4.2    ggrepel_0.9.1   ggplot2_3.4.1   lubridate_1.8.0
[9] readr_2.1.2    

loaded via a namespace (and not attached):
 [1] tidyselect_1.2.0  xfun_0.37         bslib_0.4.2       purrr_1.0.1      
 [5] splines_4.1.3     lattice_0.20-45   colorspace_2.1-0  vctrs_0.5.2      
 [9] generics_0.1.3    htmltools_0.5.4   yaml_2.3.7        mgcv_1.8-39      
[13] utf8_1.2.3        rlang_1.0.6       pillar_1.8.1      jquerylib_0.1.4  
[17] glue_1.6.2        withr_2.5.0       bit64_4.0.5       lifecycle_1.0.3  
[21] munsell_0.5.0     gtable_0.3.1      cellranger_1.1.0  ragg_1.2.2       
[25] htmlwidgets_1.5.4 evaluate_0.20     labeling_0.4.2    knitr_1.42       
[29] tzdb_0.3.0        fastmap_1.1.0     crosstalk_1.2.0   parallel_4.1.3   
[33] fansi_1.0.4       Rcpp_1.0.10       scales_1.2.1      backports_1.4.1  
[37] cachem_1.0.6      vroom_1.5.7       jsonlite_1.8.4    systemfonts_1.0.4
[41] bit_4.0.4         farver_2.1.1      textshaping_0.3.6 hms_1.1.2        
[45] digest_0.6.31     dplyr_1.1.0       grid_4.1.3        cli_3.6.0        
[49] tools_4.1.3       magrittr_2.0.3    sass_0.4.5        tibble_3.1.8     
[53] crayon_1.5.2      pkgconfig_2.0.3   Matrix_1.4-0      ellipsis_0.3.2   
[57] rmarkdown_2.20    rstudioapi_0.14   R6_2.5.1          nlme_3.1-155     
[61] compiler_4.1.3