Writing output to /Users/d3x290/Desktop/Code/sw/output.
HTML outfile is ghg-processor.html.
Working directory is /Users/d3x290/Desktop/Code/sw.
Data
Read in GHG data
Code
errors <-0# error count# Function to read a data file given by 'fn', filename# Returns NULL if there's an error readingread_ghg_data <-function(fn) { basefn <-basename(fn)message(Sys.time(), " Processing ", basefn) dat_raw <-readLines(fn)# Save the serial number in case we want to look at machine differences later sn <-trimws(gsub("SN:\t", "", dat_raw[2], fixed =TRUE))# Parse the timezone from the header and use it to make a TIMESTAMP field tz <-trimws(gsub("Timezone:\t", "", dat_raw[5], fixed =TRUE))# These files have five header lines, then the names of the columns in line 6,# and then the column units in line 7. We only want the names dat_raw <- dat_raw[-c(1:5, 7)]# Irritatingly, the units line can repeat in the file (???!?). Remove these dat_raw <- dat_raw[grep("DATAU", dat_raw, invert =TRUE)]# Double irritatingly, if there's no remark, the software write \t\t, not# \t""\t, causing a read error. Replace these instances dat_raw <-gsub("\t\t", "\tnan\t", dat_raw, fixed =TRUE) dat <-try({ readr::read_table(I(dat_raw), na ="nan", guess_max =1e4) })# If the try() above succeeded, we haev a data frame and can process it if(is.data.frame(dat)) {message("\tRead in ", nrow(dat), " rows of data, ", min(dat$DATE), " to ", max(dat$DATE))message("\tInstrument serial number: ", sn) dat$SN <- snmessage("\tInstrument time zone: ", tz)# We record the time zone but don't convert the timestamps to it as# that's not needed right now; metadata time is guaranteed to be same dat$TIMESTAMP <- lubridate::ymd_hms(paste(dat$DATE, dat$TIME)) dat$TZ <- tz# Remove unneeded Licor DATE and TIME columns dat$DATE <- dat$TIME <-NULL dat$Data_file <- basefnreturn(dat) } else {warning("File read error for ", basefn) errors <<- errors +1return(NULL) }}dat <-lapply(data_files, read_ghg_data)
Read in 9996 rows of data, 2022-09-19 to 2022-09-19
Instrument serial number: TG10-01286
Instrument time zone: US/Eastern
Code
dat <-do.call("rbind", dat)
Errors: 0
Total observations: 291,121
Read in metadata
Code
errors <-0# Function to read a metadata file given by 'fn', filename# Returns NULL if there's an error readingread_metadata <-function(fn) { basefn <-basename(fn)message(Sys.time(), " Processing ", basefn) metadat_raw <-try({read_excel(fn) })if(is.data.frame(metadat_raw)) {if(!all(METADATA_REQUIRED_FIELDS %in%names(metadat_raw))) {warning("Metadata file ", basefn, " doesn't have required fields!")return(NULL) } metadat_raw$Metadata_file <- basefn# Note that it's guaranteed that metadata time is instrument time,# so we don't worry about any time zones right now metadat <- metadat_raw# Metadata files can, optionally, have "Dead_band" and "Msmt_length" columns# If not present, insert those columns with NA valuesif(!"Dead_band"%in%names(metadat)) { metadat$Dead_band <-NA_real_ } else {message("\tCustom Dead_band column exists") }if(!"Msmt_stop"%in%names(metadat)) { metadat$Msmt_stop <-NA_real_ } else {message("\tCustom Msmt_stop column exists") } } else {warning("File read error for ", basefn) errors <<- errors +1return(NULL) } metadat}metadat <-lapply(metadata_files, read_metadata)
# For each row of metadata, find corresponding observational datazero_matches <-data.frame()match_info <-data.frame()matched_dat <-list()for(i inseq_len(nrow(metadat))) { ts <- metadat$Time_start[i] start_time <- metadat$Date[i] +hour(ts) *60*60+minute(ts) *60+second(ts) end_time <- start_time + params$MSMT_LENGTH_SEC# Subset data following timestamp in metadata and store x <-subset(dat, TIMESTAMP >= start_time & TIMESTAMP < end_time)# message("Metadata row ", i, " ", metadat$Plot[i], " start = ", start_time, " matched ", nrow(x), " data rows") info <-data.frame(Row = i,Plot = metadat$Plot[i],start_time = start_time,Rows_matched =nrow(x),Min_timestamp =NA,Max_timestamp =NA)if(nrow(x)) {# We have matched data! x$Obs <- i# Assign dead band: value given in Quarto params, but can be overridden in metadata if(is.na(metadat$Dead_band[i])) { x$DEAD_BAND_SEC <- params$DEAD_BAND_SEC } else { x$DEAD_BAND_SEC <- metadat$Dead_band[i] }# Assign stop time (in seconds, not including dead band): value given in # Quarto params, but can be overridden in metadata if(is.na(metadat$Msmt_stop[i])) { x$MSMT_STOP_SEC <- params$MSMT_LENGTH_SEC } else { x$MSMT_STOP_SEC <- metadat$Msmt_stop[i] }# Calculate ELAPSED_SECS, which is relative time (in s) since start of measurement x$ELAPSED_SECS <-time_length(interval(min(x$TIMESTAMP), x$TIMESTAMP), "seconds") info$Min_timestamp <-min(x$TIMESTAMP) info$Max_timestamp <-max(x$TIMESTAMP) } else {# No data matched the date and time given in this row of the metadatamessage("\tWARNING; file: ", metadat$File[i]) zero_matches <-rbind(zero_matches, data.frame(File = metadat$File[i],Row = i,Date = metadat$Date[i],Time_start = metadat$Time_start[i])) } match_info <-rbind(match_info, info) matched_dat[[i]] <- x}# Combine all subsetted data together and merge with metadata combined_dat <-do.call("rbind", matched_dat)combined_dat <-merge(metadat, combined_dat, by ="Obs")# Compute a few basic stats about the combined datan <-nrow(combined_dat)neg_CO2 <-sum(combined_dat$CO2 <0, na.rm =TRUE)na_CO2 <-sum(is.na(combined_dat$CO2), na.rm =TRUE)na_CH4 <-sum(is.na(combined_dat$CH4), na.rm =TRUE)datatable(match_info)
Checking for metadata rows with no matching data…
Code
# Table of zero-match data, if anyif(nrow(zero_matches)) { knitr::kable(zero_matches)}
Checking for unused data files…
Code
# Table of unused files, if anyif(length(unused_files)) { knitr::kable(data.frame(Data_file = unused_files))}
Total rows of combined data: 35,705
CO2 and CH4 measurement data
Observations with negative CO2: 0 (0%)
Observations with missing CO2: 0 (0%)
Distribution of CO2 values:
Code
# Print distribution and (below) plot random subsample of CO2 dataOVERALL_PLOT_N <-300summary(combined_dat$CO2)
Min. 1st Qu. Median Mean 3rd Qu. Max.
409.0 455.7 471.5 489.7 505.3 1113.4
ggplot(combined_dat, aes(ELAPSED_SECS, CO2, group = Obs)) +geom_point(na.rm =TRUE, color ="purple") +facet_wrap(~Plot, scales ="free") +# Plot both linear and curvilinear fits#geom_smooth(method = lm, formula = 'y ~ poly(x, 2)', na.rm = TRUE, linetype = 2, color = "blue") +geom_smooth(method = lm, formula ='y ~ x', na.rm =TRUE, color ="black", linewidth =0.5) +geom_vline(aes(xintercept = DEAD_BAND_SEC), linetype =2, color ="green") +geom_vline(aes(xintercept = MSMT_STOP_SEC), linetype =2, color ="red")
Code
ggplot(combined_dat, aes(ELAPSED_SECS, CH4, group = Obs)) +geom_point(na.rm =TRUE, color ="blue") +facet_wrap(~Plot, scales ="free") +#geom_smooth(method = lm, formula = 'y ~ poly(x, 2)', na.rm = TRUE, linetype = 2, color = "blue") +geom_smooth(method = lm, formula ='y ~ x', na.rm =TRUE, color ="black", linewidth =0.5) +geom_vline(aes(xintercept = DEAD_BAND_SEC), linetype =2, color ="green") +geom_vline(aes(xintercept = MSMT_STOP_SEC), linetype =2, color ="red")
Fluxes
Model fitting
Code
model_fit_error <-FALSE# Fit a model to units of gas per dayfit_model <-function(df, depvar) {# Metadata info <-data.frame(Obs = df$Obs[1],TIMESTAMP =mean(df$TIMESTAMP),DEAD_BAND_SEC =unique(df$DEAD_BAND_SEC),MSMT_STOP_SEC =unique(df$MSMT_STOP_SEC),TZ = df$TZ[1],SN = df$SN[1],Data_file = df$Data_file[1],Gas = depvar,Volume = df$Volume[1],Temp = df$Temp[1],Area = df$Area[1] )# Remove the dead band data df <- df[df$ELAPSED_SECS > df$DEAD_BAND_SEC,]# Remove the after-end data df <- df[df$ELAPSED_SECS <= df$MSMT_STOP_SEC,]# Fit a linear modeltry(mod <-lm(df[,depvar] ~ df$ELAPSED_SECS))if(!exists("mod")) {message("ERROR fitting model for obs ", df$Obs[1], " ", df$Plot[1], " ", depvar) model_fit_error <- model_fit_error &FALSE }# Model statistics model_stats <-glance(mod)# Slope and intercept statistics slope_stats <-tidy(mod)[2,-1]names(slope_stats) <-paste("slope", names(slope_stats), sep ="_") intercept_stats <-tidy(mod)[1,-1]names(intercept_stats) <-paste("int", names(intercept_stats), sep ="_")# Add robust regression slope as a QA/QC check robust <- MASS::rlm(df[,depvar] ~ df$ELAPSED_SECS) slope_stats$slope_estimate_robust <-coefficients(robust)[2]# Add polynomial regression R2 as a QA/QC check poly <-lm(df[,depvar] ~poly(df$ELAPSED_SECS, 3)) slope_stats$r.squared_poly <-summary(poly)$r.squared res <-cbind(info, model_stats, slope_stats, intercept_stats)# Round to a sensible number of digits and return numerics <-sapply(res, is.numeric) res[numerics] <-round(res[numerics], 3) res}results_ch4 <-lapply(split(combined_dat, combined_dat$Obs), fit_model, "CH4")results_ch4 <-do.call("rbind", results_ch4)results_co2 <-lapply(split(combined_dat, combined_dat$Obs), fit_model, "CO2")results_co2 <-do.call("rbind", results_co2)
Unit conversion
Code
required_fields <-c("Volume", "Temp")if(!all(required_fields %in%names(combined_dat))) {stop("We need volume ('Volume') and temperature ('Temp')") }# The slopes and their errors, from model-fitting above, are in ppb/s (CH4) # and ppm/s (CO2). Convert them to µmol/m2/day and mmol/m2/day respectively.# CH4# Use known volume of chamber to convert ppb/s to liters/s:# ppb CH4 * volume of chamber (m3) * 1000 L per m3CH4_slope_L <-with(results_ch4, (slope_estimate /1e9) * Volume *1000)CH4_slope_err_L <-with(results_ch4, (slope_std.error /1e9) * Volume *1000)# Use ideal gas law to calculate µmol of CH4 per m2 ground:# (atm pressure * L CH4) / (R [in L*atm / T[K] * mol] * T[K]) * 10^6 µmol/molTEMP_K <-with(results_ch4, Temp +273.15)AREA <- results_ch4$AreaS_PER_DAY <-60*60*24results_ch4$flux_estimate <- (1* CH4_slope_L) / (0.08206* TEMP_K) *1e6/ AREA * S_PER_DAYresults_ch4$flux_units <-"µmol/m2/day"results_ch4$flux_std.error <- (1* CH4_slope_err_L) / (0.08206* TEMP_K) *1e6/ AREA * S_PER_DAY# CO2# Use known volume of chamber to convert ppm/s to liters/s:# ppm CO2 * volume of chamber (m3) * 1000 L per m3CO2_slope_L <-with(results_co2, (slope_estimate /1e9) * Volume *1000)CO2_slope_err_L <-with(results_co2, (slope_std.error /1e9) * Volume *1000)# Use ideal gas law to calculate mmol of CO2 per m2 ground:# (atm pressure * L CO2) / (R [in L*atm / T[K] * mol] * T[K]) * 10^3 mmol/mol#combined_dat$`CO2 mmol/m2` <- (1 * CO2_L) / (0.08206 * TEMP_K) * 1e3 / AREATEMP_K <-with(results_co2, Temp +273.15)AREA <- results_co2$Arearesults_co2$flux_estimate <- (1* CO2_slope_L) / (0.08206* TEMP_K) *1e6/ AREA * S_PER_DAYresults_co2$flux_units <-"mmol/m2/day"results_co2$flux_std.error <- (1* CO2_slope_err_L) / (0.08206* TEMP_K) *1e6/ AREA * S_PER_DAY# Combine CH4 and CO2 results and re-merge with metadataresults <-rbind(results_co2, results_ch4)# Drop columns we don't need because they'll come in with the mergeresults$Area <- results$Temp <- results$Volume <-NULL# Re-merge with metadataresults <-as_tibble(merge(results, metadat, by ="Obs"))
Total rows of results: 238
Flux summary
Code
# Flux table and visualizationformatRound(datatable(results),which(sapply(results, is.numeric)), digits =3)
write_csv(combined_dat, obs_fn)if(params$SAVE_RESULTS_FIGURES) {message("Writing plot outputs...", appendLF =FALSE)# Loop through each line, generating plot and filename and savingfor(i inseq_len(nrow(results))) { p <-plot_group(results[i,], combined_dat) fn <-mkfn(paste("obs", results$Obs[i], results$Date[i], results$Plot[i], sep ="_"), "pdf")ggsave(fn, plot = p, width =8, height =5) }message("\t", i, " written")}
Writing plot outputs...
238 written
Metadata
Note this doesn’t include any ‘extra’ fields in the metadata file.
Code
results_fields <-c("adj.r.squared"="Linear flux model adjusted R2","AIC"="Linear flux model AIC","Area"="Area entry from metadata file, m2","BIC"="Linear flux model BIC","Data_file"="Data file name","Date"="Date entry from metadata file","DEAD_BAND_SEC"="Dead band used for flux calculation, s","Dead_band"="Dead band entry from metadata file, if present, s","deviance"="Linear flux model deviance","df.residual"="Linear flux model degrees of freedom","df"="Linear flux model degrees of freedom","FLAG_INTERCEPT"="Flag: linear model intercept flagged as unusual","FLAG_POLY_DIV"="Flag: linear model R2 diverges from polynomial model","FLAG_R2"="Flag: linear model R2 flagged as unusual","FLAG_ROBUST_DIV"="Flag: linear model flux diverges from robust model","flux_estimate"="Gas flux computed from slope of linear flux model","flux_std.error"="Standard error of flux_estimate","flux_units"="Units of flux_estimate and flux_std.error","Gas"="Gas (CO2 or CH4)","int_estimate"="Intercept of linear flux model, ppb (CH4) or ppm (CO2)","int_p.value"="P-value of intercept of linear flux model","int_statistic"="Statistic (t-value) of intercept of linear flux model","int_std.error"="Standard error of intercept of linear flux model","logLik"="Linear flux model log-likelihood","Metadata_file"="Metadata file name","MSMT_STOP_SEC"="Measurement stop used for flux calculation, s","Msmt_stop"="Measurement stop entry from metadata file, if present, s","nobs"="Linear flux model number of observations","Obs"="Observation number, i.e. order in metadata file(s)","p.value"="Linear flux model p-value","Plot"="Plot entry from metadata file","r.squared_poly"="Polynomial flux model R2","r.squared"="Linear flux model R2","sigma"="Linear flux model sigma","slope_estimate_robust"="Slope of robust flux model, ppb (CH4) or ppm (CO2) /s","slope_estimate"="Slope of linear flux model, ppb (CH4) or ppm (CO2) /s","slope_p.value"="P-value of slope of linear flux model","slope_statistic"="Statistic (t-value) of slope of linear flux model","slope_std.error"="Standard error of slope of linear flux model","SN"="Serial number from analyzer","statistic"="Linear flux model F-statistic","Temp"="Temperature entry from metadata file, degC","Time_start"="Time_start entry from metadata file","TIMESTAMP"="Original timestamp from analyzer","TZ"="Time zone from analyzer","Volume"="Volume entry from metadata file, m3")# Which metadata names above don't occur in results?not_there <-names(results_fields)[!names(results_fields) %in%names(results)]if(length(not_there)) {message("Field metadata descriptions NOT in results: ", paste(not_there, collapse =", "))}results_meta <-data.frame(Field_name =names(results))results_meta$Description <- results_fields[results_meta$Field_name]# Mark any metadata fields that aren't required - they're extra, from userfrom_metadata <-is.na(results_meta$Description) & results_meta$Field_name %in%names(metadat)results_meta$Description[from_metadata] <-"<from metadata file>"datatable(results_meta)