Render this report
rmarkdown::render("combine_farm_inputs.Rmd")
# Override directories at render time:
rmarkdown::render("combine_farm_inputs.Rmd",
params = list(data_dir="path/to/DATA", output_dir="path/to/OUTPUT"))Place all farm .xlsx files in
DATA/. Master Excel is written to
OUTPUT/SAS_4842_Master_Farm_Inputs.xlsx.
All data loading, parsing, combining, and Excel export runs in the
single chunk below. Output objects (master_fuel,
master_yield, master_fert,
master_flags, structure_audit,
parse_summary, FILE_REGISTRY) are available to
every display chunk that follows.
# =============================================================================
# ── 1. SETTINGS ─────────────────────────────────────────────────────────────
# =============================================================================
DATA_DIR <- params$data_dir
OUTPUT_DIR <- params$output_dir
# Farm keyword matching table.
# Add a row here to onboard an additional farm — no other changes needed.
FARM_PATTERNS <- data.frame(
farm_id = c("Dyt Farms","Hettinga Farms","Mineral King Sawtooth",
"Vander Dussen","DCB Farming","Eric Shannon Farming",
"B&R TeVelde","Ron Colburn"),
pai_id = 1:8,
keywords = c("dyt","hettinga","sawtooth|mk_saw|mksawtooth","vander",
"dcb","shannon|eric_shannon","tevelde|b_r_te","colburn|ron_col"),
stringsAsFactors = FALSE
)
# =============================================================================
# ── 2. FILE DISCOVERY & MATCHING ────────────────────────────────────────────
# =============================================================================
# ── Working-directory note ─────────────────────────────────────────────────
# When rendered via rmarkdown::render(), R's working directory is set to the
# folder containing the .Rmd file. DATA_DIR = "DATA" therefore resolves to
# a DATA/ subfolder *next to* the .Rmd — not the RStudio project root or the
# shell directory you ran the render command from.
# If your files are elsewhere, pass the full absolute path:
# rmarkdown::render("...", params = list(data_dir = "/full/path/to/DATA"))
wd_at_render <- getwd()
if (!dir.exists(DATA_DIR)) {
msg <- paste0(
"DATA directory not found.\n\n",
" Looking for : ", normalizePath(DATA_DIR, mustWork=FALSE), "\n",
" Working dir : ", wd_at_render, "\n\n",
"Fix options:\n",
" 1. Create a DATA/ folder next to the .Rmd and put your .xlsx files in it.\n",
" 2. Override at render time:\n",
" rmarkdown::render(\"your.Rmd\",\n",
" params = list(data_dir = \"/absolute/path/to/DATA\"))\n"
)
stop(msg, call.=FALSE)
}
dir.create(OUTPUT_DIR, showWarnings=FALSE, recursive=TRUE)
# List ALL files in DATA/ (any extension) so the error message is informative
all_files <- list.files(DATA_DIR, full.names=TRUE, recursive=FALSE)
all_xlsx <- all_files[grepl("\\.xlsx$", all_files, ignore.case=TRUE)]
if (length(all_xlsx) == 0) {
all_names <- if (length(all_files) > 0)
paste0(" - ", basename(all_files), collapse="\n")
else
" (folder is empty)"
msg <- paste0(
"No .xlsx files found in DATA/.\n\n",
" DATA dir resolved to : ", normalizePath(DATA_DIR), "\n",
" Working dir : ", wd_at_render, "\n",
" Files present :\n", all_names, "\n\n",
"Fix options:\n",
" 1. Move your .xlsx files into: ", normalizePath(DATA_DIR), "\n",
" 2. Pass a different directory:\n",
" rmarkdown::render(\"your.Rmd\",\n",
" params = list(data_dir = \"/absolute/path/to/DATA\"))\n"
)
stop(msg, call.=FALSE)
}
match_farm <- function(filepath) {
base <- tolower(basename(filepath))
for (i in seq_len(nrow(FARM_PATTERNS))) {
if (grepl(FARM_PATTERNS$keywords[i], base, perl=TRUE))
return(data.frame(farm_id = FARM_PATTERNS$farm_id[i],
pai_id = FARM_PATTERNS$pai_id[i],
filename = basename(filepath),
filepath = filepath,
stringsAsFactors = FALSE))
}
NULL
}
matched_list <- Filter(Negate(is.null), lapply(all_xlsx, match_farm))
if (length(matched_list) == 0)
stop("No files matched any farm pattern. Check FARM_PATTERNS.", call.=FALSE)
FILE_REGISTRY <- bind_rows(matched_list) |> arrange(pai_id)
unmatched <- setdiff(basename(all_xlsx), sapply(matched_list, `[[`, "filename"))
# =============================================================================
# ── 3. HELPER FUNCTIONS ─────────────────────────────────────────────────────
# =============================================================================
safe_read <- function(path, sheet, ...) {
tryCatch(suppressMessages(readxl::read_excel(path, sheet=sheet, ...)),
error=function(e) NULL)
}
clean_cols <- function(df) {
if (is.null(df)) return(NULL)
names(df) <- trimws(as.character(names(df)))
df
}
tag <- function(df, farm_id, pai_id, source_file, source_sheet) {
df |> mutate(farm_id=farm_id, pai_id=as.integer(pai_id),
source_file=source_file, source_sheet=source_sheet, .before=1)
}
year_cols_of <- function(df) {
nms <- names(df)
nms[vapply(nms, function(x) {
v <- suppressWarnings(as.integer(x))
!is.na(v) && v >= 2010L && v <= 2035L
}, logical(1))]
}
rename_first <- function(df, pattern, new_name) {
idx <- which(grepl(pattern, names(df), ignore.case=TRUE, perl=TRUE))[1]
if (!is.na(idx) && !(new_name %in% names(df))) names(df)[idx] <- new_name
df
}
classify_fuel <- function(fuel_name) {
fn <- toupper(trimws(as.character(fuel_name)))
dplyr::case_when(
grepl("RENEW|NESTE|HVO|R99", fn) ~ "Renewable Diesel",
grepl("UNLEAD|GASOLINE|\\b87\\b|\\b91\\b", fn) ~ "Gasoline",
grepl("LPG|PROPANE", fn) ~ "LPG",
TRUE ~ "Conventional Diesel"
)
}
tier_letter <- function(q) {
q <- as.character(q)
dplyr::case_when(startsWith(q,"A")~"A", startsWith(q,"B")~"B",
startsWith(q,"C")~"C", startsWith(q,"D")~"D",
startsWith(q,"E")~"E", startsWith(q,"F")~"F", TRUE~"C")
}
tier_u_ad <- function(t) unname(c(A=5,B=10,C=15,D=25,E=50,F=75)[t])
coalesce_first <- function(...) {
for (x in list(...)) if (!is.null(x)) return(x)
NULL
}
# Accumulators filled by parse_farm()
fuel_list <- list()
yield_list <- list()
fert_list <- list()
audit_log <- list()
# =============================================================================
# ── 4. PER-FARM PARSER ──────────────────────────────────────────────────────
# =============================================================================
parse_farm <- function(fp, fn, farm, pai) {
sheets <- tryCatch(readxl::excel_sheets(fp), error=function(e) character(0))
rd <- function(sheet, ...) {
if (!(sheet %in% sheets)) return(NULL)
df <- safe_read(fp, sheet, ...) |> clean_cols()
if (!is.null(df) && nrow(df) == 0) return(NULL)
df
}
# ── YIELD ─────────────────────────────────────────────────────────────────
parse_yield <- function() {
df <- rd("Yield Summary")
if (is.null(df)) return(NULL)
df <- df |>
rename_first("^Farm *ID$|^Farm ID *$","Farm") |>
rename_first("^Farm *$","Farm") |>
rename_first("^Block ID *$","Block_ID") |>
rename_first("^Crop name$","Crop") |>
rename_first("^Farm name$","Farm") |>
rename_first("^Yield/acre$","Yield_per_acre")
yr_c <- year_cols_of(df)
# Float year-header fix (DCB: "2020.0" → Unnamed cols)
if (length(yr_c) == 0) {
first_row <- suppressWarnings(as.numeric(unlist(df[1, ])))
if (any(!is.na(first_row) & first_row >= 2010 & first_row <= 2035)) {
df_retry <- tryCatch(
suppressMessages(
readxl::read_excel(fp, sheet="Yield Summary", skip=1)) |> clean_cols(),
error=function(e) NULL)
if (!is.null(df_retry)) { df <- df_retry; yr_c <- year_cols_of(df) }
}
}
# Wide format → long (DCB aggregate row)
if (!("Yield" %in% names(df)) && length(yr_c) > 0) {
fc <- names(df)[1]
cc <- names(df)[grepl("crop", names(df), TRUE)][1]
uc <- names(df)[grepl("unit", names(df), TRUE)][1]
df <- df |>
mutate(across(all_of(yr_c), as.character)) |>
filter(!is.na(.data[[fc]]),
!grepl("entity|yield|lbs|acre", tolower(as.character(.data[[fc]])), TRUE)) |>
slice(1) |>
pivot_longer(cols=all_of(yr_c), names_to="Year", values_to="Yield") |>
mutate(Farm = farm,
Crop = if (!is.na(cc)) as.character(.data[[cc]]) else "Almonds",
Unit = if (!is.na(uc)) as.character(.data[[uc]]) else "lbs",
Variety = "All sub-farms (aggregate)")
}
df <- df |>
mutate(Year = suppressWarnings(as.integer(Year)),
Yield = suppressWarnings(as.numeric(Yield))) |>
filter(!is.na(Yield), !is.na(Year))
if (nrow(df) == 0) {
audit_log[[paste0(farm,"_yld_empty")]] <<- data.frame(
farm_id=farm, sheet="Yield Summary",
issue="Sheet present but no valid data rows",
action="Obtain yield records from farm", stringsAsFactors=FALSE)
return(NULL)
}
uc2 <- intersect(c("Unit","unit"), names(df))[1]
df <- df |> mutate(
data_quality = dplyr::case_when(
!is.na(uc2) & grepl("\\?", as.character(.data[[uc2]])) ~ "A — Unit uncertain (lbs?)",
!is.na(uc2) & grepl("(?i)^ton$|^t$", as.character(.data[[uc2]])) ~ "A — Unit=metric ton",
!is.na(uc2) & grepl("(?i)bu|bushel", as.character(.data[[uc2]])) ~ "A — Unit=BU/acre",
TRUE ~ "A — Source document"),
Notes = data_quality)
keep <- intersect(c("Farm","Year","Crop","Variety","Block_ID","Yield","Unit",
"Acres","Yield_per_acre","Source","data_quality","Notes"), names(df))
tag(df[, keep, drop=FALSE], farm, pai, fn, "Yield Summary")
}
# ── FUEL ──────────────────────────────────────────────────────────────────
parse_fuel <- function() {
results <- list()
df <- rd("Fuel Summary")
# Two-row header fix (B&R TeVelde, Ron Colburn: "Gal/Acre" in col names)
if (!is.null(df) && any(grepl("Gal.Acre|Gal/Acre", names(df), TRUE))) {
df_retry <- tryCatch(
suppressMessages(
readxl::read_excel(fp, sheet="Fuel Summary", skip=1)) |> clean_cols(),
error=function(e) df)
if (!is.null(df_retry)) df <- df_retry
}
if (!is.null(df)) {
yr_c <- year_cols_of(df)
has_qty <- any(grepl("qty|quantity", names(df), TRUE))
if (length(yr_c) > 0 && !has_qty) {
# ── WIDE FORMAT (Dyt, DCB) ─────────────────────────────────────────
fc <- names(df)[grepl("^farm$", names(df), TRUE)][1]
tc <- names(df)[grepl("fuel.*type$", names(df), TRUE)][1]
if (!is.na(fc) && !is.na(tc)) {
# Cast all year cols to character — prevents character/double mismatch
df <- df |> mutate(across(all_of(yr_c), as.character))
long1 <- df |>
filter(!is.na(.data[[fc]]),
!grepl("^total$|^na$|^nan$", tolower(as.character(.data[[fc]])), TRUE)) |>
select(Farm=all_of(fc), fuel_name=all_of(tc), all_of(yr_c)) |>
pivot_longer(yr_c, names_to="Year", values_to="qty_gal")
# Mirror Diesel block (DCB has .1-suffix duplicate columns)
fc2 <- names(df)[grepl("farm\\.", names(df), TRUE)][1]
tc2 <- names(df)[grepl("fuel.*type\\.", names(df), TRUE)][1]
yr_dot <- names(df)[grepl(paste0("^(",paste(yr_c,collapse="|"),")\\.\\d"), names(df))]
long2 <- if (!is.na(fc2) && !is.na(tc2) && length(yr_dot) > 0) {
yr_c_clean <- gsub("\\.\\d+$","", yr_dot)
df |>
mutate(across(all_of(yr_dot), as.character)) |>
filter(!is.na(.data[[fc2]]),
!grepl("^total$|^na$|^nan$", tolower(as.character(.data[[fc2]])), TRUE)) |>
select(Farm=all_of(fc2), fuel_name=all_of(tc2), all_of(yr_dot)) |>
rename_with(~gsub("\\.\\d+$","",.), all_of(yr_dot)) |>
pivot_longer(intersect(yr_c, yr_c_clean), names_to="Year", values_to="qty_gal")
} else NULL
# LPG rows (DCB)
long_lpg <- if (!is.na(tc) &&
any(grepl("LPG", toupper(as.character(df[[tc]])), fixed=TRUE))) {
df |>
filter(grepl("LPG", toupper(as.character(.data[[tc]])), fixed=TRUE)) |>
select(Farm=all_of(fc), fuel_name=all_of(tc), all_of(yr_c)) |>
pivot_longer(yr_c, names_to="Year", values_to="qty_gal") |>
mutate(fuel_name="LPG")
} else NULL
wide_long <- bind_rows(long1, long2, long_lpg) |>
filter(!is.na(qty_gal)) |>
mutate(qty_gal = suppressWarnings(as.numeric(qty_gal))) |>
filter(!is.na(qty_gal), qty_gal != 0) |>
mutate(Year = suppressWarnings(as.integer(Year)),
fuel_type = classify_fuel(fuel_name),
unit = "Gal",
gal_per_acre = NA_real_,
data_quality = "B — Annual summary (no individual invoice dates)")
results[["wide"]] <- tag(wide_long, farm, pai, fn, "Fuel Summary")
} else {
# ── GAL/ACRE FORMAT (B&R TeVelde, Ron Colburn) ────────────────────
tbl <- df |>
rename_first("^Unnamed.*0$|^Crop$","Crop") |>
filter(!is.na(Crop),
!grepl("source|note|gal|please|crop|^na$", tolower(Crop), TRUE)) |>
select(Crop, all_of(yr_c)) |>
pivot_longer(yr_c, names_to="Year", values_to="gal_per_acre") |>
mutate(Year = suppressWarnings(as.integer(Year)),
gal_per_acre = suppressWarnings(as.numeric(gal_per_acre)),
Farm=farm, fuel_type="Conventional Diesel",
fuel_name="Diesel", unit="Gal/Acre", qty_gal=NA_real_,
data_quality="E — Gal/Acre only; area not provided; qty not calculable") |>
filter(!is.na(gal_per_acre))
results[["galacre"]] <- tag(tbl, farm, pai, fn, "Fuel Summary")
}
} else {
# ── LONG FORMAT (Hettinga, Vander Dussen, MK Sawtooth, Eric Shannon)
df <- df |>
rename_first("^Fuel Name$", "fuel_name") |>
rename_first("^Fuel Type$", "fuel_type_raw") |>
rename_first("^Quantity$", "qty_gal") |>
rename_first("^Unit$", "unit") |>
rename_first("^SOURCE$|^Source$", "Source")
# Guard: if Year column is absent, log and skip rather than crash
if (!"Year" %in% names(df)) {
audit_log[[paste0(farm,"_fuel_no_year")]] <<- data.frame(
farm_id=farm, sheet="Fuel Summary",
issue="Year column absent — check for two-row header",
action="Inspect source file", stringsAsFactors=FALSE)
return(NULL)
}
# Resolve fuel name column before entering the pipe (names(.) unavailable with |>)
fuel_col <- if ("fuel_name" %in% names(df)) "fuel_name" else
if ("fuel_type_raw" %in% names(df)) "fuel_type_raw" else NULL
df <- df |>
filter(!is.na(qty_gal), !is.na(Year)) |>
mutate(Year = suppressWarnings(as.integer(Year)),
qty_gal = suppressWarnings(as.numeric(qty_gal)),
unit = gsub("(?i)gallon","Gal", as.character(unit)),
fuel_type = if (!is.null(fuel_col)) classify_fuel(.data[[fuel_col]])
else classify_fuel(NA_character_),
gal_per_acre = NA_real_,
data_quality = "A — Dated purchase/sales invoice")
keep <- intersect(c("Date","Year","fuel_name","fuel_type","qty_gal",
"gal_per_acre","unit","Source","data_quality"), names(df))
results[["long"]] <- tag(df[, keep, drop=FALSE], farm, pai, fn, "Fuel Summary")
}
}
# ── HANDWRITTEN FUEL SUMMARY (Eric Shannon only) ───────────────────────
df_hw <- rd("Handwritten Fuel Summary")
if (!is.null(df_hw)) {
yr_hw <- year_cols_of(df_hw)
fixed <- df_hw |>
slice(1:5) |>
rename_first("Ranch|Unnamed.*0","Ranch") |>
rename_first("Area.*acres","Acres") |>
rename_first("Gal.Acre","GalAcre")
hw_rows <- list()
for (i in seq_len(nrow(fixed))) {
ranch <- as.character(fixed$Ranch[i])
if (is.na(ranch) || ranch == "NA") next
acres <- suppressWarnings(as.numeric(fixed$Acres[i]))
gpa <- suppressWarnings(as.numeric(fixed$GalAcre[i]))
base <- NA_real_
for (yr in sort(yr_hw, decreasing=TRUE)) {
v <- suppressWarnings(as.numeric(as.character(fixed[[as.character(yr)]][i])))
if (!is.na(v)) base <- v else v <- base
hw_rows[[length(hw_rows)+1]] <- data.frame(
Farm=paste(farm,"—",ranch), Year=as.integer(yr),
Acres=acres, gal_per_acre=gpa, qty_gal=v,
fuel_type="Conventional Diesel", unit="Gal",
data_quality="D — Estimated gal/acre × acres (handwritten)",
stringsAsFactors=FALSE)
}
}
# South Cairns block (dynamic detection)
sc_idx <- which(grepl("SOUTH CAIRNS", toupper(as.character(df_hw[[1]]))))
if (length(sc_idx) > 0) {
blk <- df_hw[sc_idx[1]:(sc_idx[1]+5), ]
yr_r <- blk[grepl("year", tolower(as.character(blk[[1]])), TRUE), ]
qt_r <- blk[grepl("consumpt",tolower(as.character(blk[[1]])), TRUE), ]
if (nrow(yr_r) > 0 && nrow(qt_r) > 0) {
yrs <- suppressWarnings(as.integer(unlist(yr_r[1, 2:5])))
qtys <- suppressWarnings(as.numeric(unlist(qt_r[1, 2:5])))
for (j in seq_along(yrs))
if (!is.na(yrs[j]) && !is.na(qtys[j]))
hw_rows[[length(hw_rows)+1]] <- data.frame(
Farm=paste(farm,"— South Cairns"), Year=yrs[j],
Acres=394.6, gal_per_acre=NA_real_, qty_gal=qtys[j],
fuel_type="Conventional Diesel", unit="Gal",
data_quality="B — Handwritten fuel usage estimate",
stringsAsFactors=FALSE)
}
}
if (length(hw_rows) > 0) {
hw_df <- bind_rows(hw_rows) |> filter(!is.na(qty_gal))
results[["handwritten"]] <- tag(hw_df, farm, pai, fn, "Handwritten Fuel Summary")
}
}
if (length(results) == 0) {
audit_log[[paste0(farm,"_fuel_absent")]] <<- data.frame(
farm_id=farm, sheet="Fuel",
issue="No fuel sheet found in source file",
action="Request fuel records from farm", stringsAsFactors=FALSE)
return(NULL)
}
bind_rows(results)
}
# ── FERTILISER ────────────────────────────────────────────────────────────
parse_fert <- function() {
df <- coalesce_first(rd("Fertiliser Summary"), rd("Fertilizer Summary"))
if (is.null(df)) {
audit_log[[paste0(farm,"_fert_absent")]] <<- data.frame(
farm_id=farm, sheet="Fertiliser",
issue="No Fertiliser Summary sheet in source file",
action="Request fertiliser records from farm", stringsAsFactors=FALSE)
return(NULL)
}
df <- df |>
rename_first("Farm.ID|Farm ID", "Farm") |>
rename_first("^N%$", "N_pct") |>
rename_first("Note.*Comment", "Notes") |>
rename_first("^Consumption", "Consumption") |>
rename_first("Rate.Acre", "Rate_per_acre") |>
rename_first("Qty.*\\(kg\\)", "qty_kg") |>
rename_first("Qty N", "qty_n_kg") |>
rename_first("(?i)^year$", "Year") # normalise case variants
if (!"Year" %in% names(df)) {
audit_log[[paste0(farm,"_fert_no_year")]] <<- data.frame(
farm_id=farm, sheet="Fertiliser Summary",
issue="Year column not found — check sheet structure",
action="Inspect source file", stringsAsFactors=FALSE)
return(NULL)
}
if (!"Fertilizer" %in% names(df)) {
audit_log[[paste0(farm,"_fert_no_fert")]] <<- data.frame(
farm_id=farm, sheet="Fertiliser Summary",
issue="Fertilizer column not found — check sheet structure",
action="Inspect source file", stringsAsFactors=FALSE)
return(NULL)
}
df <- df |>
filter(!is.na(Fertilizer), !is.na(Year)) |>
mutate(Year = suppressWarnings(as.integer(Year)),
N_pct = suppressWarnings(as.numeric(N_pct)))
if (nrow(df) == 0) return(NULL)
notes_v <- if ("Notes" %in% names(df)) df$Notes else rep(NA_character_, nrow(df))
df <- df |> mutate(
data_quality = dplyr::case_when(
grepl("sawtooth|mk", tolower(farm), TRUE) ~ "D — Sold qty (not confirmed consumption)",
!is.na(notes_v) & grepl("confirm blend|customized",
tolower(as.character(notes_v)), TRUE) ~ "C — Blend composition unconfirmed",
!is.na(notes_v) & grepl("name\\?|unit\\?",
tolower(as.character(notes_v)), TRUE) ~ "C — Name or unit unconfirmed",
!is.na(notes_v) & grepl("n% not avail",
tolower(as.character(notes_v)), TRUE) ~ "D — N% unavailable",
grepl("tevelde|colburn", tolower(farm), TRUE) ~ "E — Rate/acre only; area not provided",
TRUE ~ "A — Source document"
)
)
keep <- intersect(
c("Farm","Year","Date","Fertilizer","Unit","N_pct","Consumption","qty_kg",
"qty_n_kg","Rate_per_acre","Category","Name","Block_ID","Notes",
"Source","data_quality"),
names(df))
# Append Sheet5 (MK Sawtooth 2025 raw invoice)
df5 <- rd("Sheet5")
if (!is.null(df5) && ncol(df5) >= 5) {
names(df5)[1:5] <- c("Date","Fertilizer","Consumption","Unit","N_pct")
df5 <- df5 |>
filter(!is.na(Fertilizer), !is.na(Date)) |>
mutate(Year=2025L, Farm=farm, data_quality="D — Sold qty 2025 raw invoice")
df <- bind_rows(df[, keep, drop=FALSE],
df5[, intersect(keep, names(df5)), drop=FALSE])
} else {
df <- df[, keep, drop=FALSE]
}
tag(df, farm, pai, fn, "Fertiliser Summary")
}
# ── Dispatch & store
y <- parse_yield()
f <- parse_fuel()
r <- parse_fert()
if (!is.null(y) && is.data.frame(y) && nrow(y) > 0) yield_list[[farm]] <<- y
if (!is.null(f) && is.data.frame(f) && nrow(f) > 0) fuel_list[[farm]] <<- f
if (!is.null(r) && is.data.frame(r) && nrow(r) > 0) fert_list[[farm]] <<- r
data.frame(PAI=pai, Farm=farm,
Yield = if (!is.null(y) && is.data.frame(y) && nrow(y)>0) nrow(y) else 0L,
Fuel = if (!is.null(f) && is.data.frame(f) && nrow(f)>0) nrow(f) else 0L,
Fert = if (!is.null(r) && is.data.frame(r) && nrow(r)>0) nrow(r) else 0L,
stringsAsFactors=FALSE)
}
# =============================================================================
# ── 5. RUN ALL FARMS ────────────────────────────────────────────────────────
# =============================================================================
parse_summary <- bind_rows(Map(parse_farm,
FILE_REGISTRY$filepath,
FILE_REGISTRY$filename,
FILE_REGISTRY$farm_id,
FILE_REGISTRY$pai_id))
# =============================================================================
# ── 6. BUILD MASTER TABLES ──────────────────────────────────────────────────
# =============================================================================
master_fuel <- bind_rows(fuel_list) |>
mutate(Year = suppressWarnings(as.integer(Year)),
qty_gal = suppressWarnings(as.numeric(qty_gal)),
data_tier = tier_letter(data_quality),
u_ad_pct = tier_u_ad(data_tier)) |>
arrange(pai_id, Year, fuel_type)
master_yield <- bind_rows(yield_list) |>
mutate(Year = suppressWarnings(as.integer(Year)),
Yield = suppressWarnings(as.numeric(Yield))) |>
arrange(pai_id, Year)
master_fert <- bind_rows(fert_list) |>
mutate(Year = suppressWarnings(as.integer(Year)),
N_pct = suppressWarnings(as.numeric(N_pct)),
data_tier = tier_letter(data_quality),
u_ad_pct = tier_u_ad(data_tier)) |>
arrange(pai_id, Year, Fertilizer)
# =============================================================================
# ── 7. DATA FLAGS ───────────────────────────────────────────────────────────
# =============================================================================
manual_flags <- bind_rows(lapply(audit_log, as.data.frame))
auto_fuel_flags <- master_fuel |>
filter(grepl("Gal/Acre|rate|sold", data_quality, ignore.case=TRUE)) |>
distinct(farm_id, source_sheet, data_quality) |>
rename(sheet=source_sheet, issue=data_quality) |>
mutate(action="Review with IQC before use in emissions model")
auto_fert_flags <- master_fert |>
filter(grepl("rate|sold|N%", data_quality, ignore.case=TRUE)) |>
distinct(farm_id, source_sheet, data_quality) |>
rename(sheet=source_sheet, issue=data_quality) |>
mutate(action="Resolve before PAI calculation")
master_flags <- bind_rows(manual_flags, auto_fuel_flags, auto_fert_flags) |>
distinct() |>
arrange(farm_id, sheet)
# =============================================================================
# ── 8. STRUCTURE AUDIT ──────────────────────────────────────────────────────
# =============================================================================
structure_audit <- FILE_REGISTRY |>
select(pai_id, farm_id, source_filename=filename) |>
left_join(master_fuel |> count(farm_id, name="fuel_rows"), by="farm_id") |>
left_join(master_yield |> count(farm_id, name="yield_rows"), by="farm_id") |>
left_join(master_fert |> count(farm_id, name="fert_rows"), by="farm_id") |>
replace_na(list(fuel_rows=0L, yield_rows=0L, fert_rows=0L)) |>
mutate(
extra_sheets = mapply(function(fp_val) {
known <- c("Yield Summary","Fuel Summary","Fertiliser Summary","Fertilizer Summary",
"Handwritten Fuel Summary","Sheet5","Sheet4","Sheet1","Sheet1 (2)","Sheet2",
"Fertiliser data 2020-25","DATA CAPTURE TEMPLATE")
extra <- setdiff(tryCatch(readxl::excel_sheets(fp_val), error=function(e)""), known)
if (length(extra)==0) "—" else paste(extra, collapse="; ")
}, FILE_REGISTRY$filepath),
data_gaps = dplyr::case_when(
yield_rows==0 & fert_rows==0 ~ "⚠ Yield and Fert absent",
yield_rows==0 ~ "⚠ Yield absent",
fert_rows ==0 ~ "⚠ Fert absent",
TRUE ~ "✓ OK")
)
# =============================================================================
# ── 9. EXPORT MASTER EXCEL ──────────────────────────────────────────────────
# =============================================================================
if (params$write_excel) {
out_path <- file.path(OUTPUT_DIR, "SAS_4842_Master_Farm_Inputs.xlsx")
# Ensure OUTPUT folder exists (recursive = TRUE handles nested paths)
if (!dir.create(OUTPUT_DIR, showWarnings=FALSE, recursive=TRUE) &&
!dir.exists(OUTPUT_DIR)) {
stop("Cannot create OUTPUT directory: '",
normalizePath(OUTPUT_DIR, mustWork=FALSE), "'.\n",
"Check that you have write permission to that location.", call.=FALSE)
}
# If a previous version of the file is open in Excel, delete it first.
# On Windows, an open .xlsx is locked — write_xlsx fails with a permissions
# error even though you own the file. Removing it before writing sidesteps
# the lock (the open Excel window will show a "file not found" notice but
# will not lose the data it already has loaded).
if (file.exists(out_path)) {
del_ok <- tryCatch({ file.remove(out_path); TRUE },
warning = function(w) FALSE,
error = function(e) FALSE)
if (!del_ok) {
stop(
"Cannot overwrite: '", normalizePath(out_path), "'\n\n",
"The file is most likely open in Excel.\n",
" → Close Excel (or just the file), then re-render.\n\n",
"Alternative: change the output filename in the params:\n",
" rmarkdown::render(\"your.Rmd\",\n",
" params = list(output_dir = \"OUTPUT_v2\"))",
call.=FALSE)
}
}
write_ok <- tryCatch({
writexl::write_xlsx(
list("STRUCTURE AUDIT" = structure_audit,
"FUEL MASTER" = master_fuel,
"YIELD MASTER" = master_yield,
"FERTILISER MASTER" = master_fert,
"DATA FLAGS" = master_flags),
path = out_path
)
TRUE
}, error = function(e) {
stop(
"write_xlsx failed: ", conditionMessage(e), "\n\n",
"Attempted path : ", normalizePath(out_path, mustWork=FALSE), "\n",
"OUTPUT dir : ", normalizePath(OUTPUT_DIR), "\n",
"Dir writable? : ", file.access(OUTPUT_DIR, mode=2) == 0, "\n\n",
"Common fixes:\n",
" • Close the file in Excel if it is open (Windows file lock)\n",
" • Check write permissions on: ", normalizePath(OUTPUT_DIR), "\n",
" • Pass a different output_dir param if the default is read-only:\n",
" rmarkdown::render(\"your.Rmd\",\n",
" params = list(output_dir = \"/writable/path/OUTPUT\"))",
call.=FALSE)
})
}FILE_REGISTRY |>
select(PAI=pai_id, Farm=farm_id, `Source file`=filename) |>
fmt_tbl(caption="Files matched to farms")| PAI | Farm | Source file |
|---|---|---|
| 1 | Dyt Farms | 4842 Dyt Farms Summary Input.xlsx |
| 2 | Hettinga Farms | 4842 Hettinga Farms Summary Input.xlsx |
| 3 | Mineral King Sawtooth | 4842 MK Sawtooth Summary Input.xlsx |
| 4 | Vander Dussen | 4842 Vander Dussen Summary Input.xlsx |
| 5 | DCB Farming | 4842 DCB Farming Summary Input.xlsx |
| 6 | Eric Shannon Farming | 4842 Eric Shannon Farming Summary Input.xlsx |
| 7 | B&R TeVelde | 4842 B&R Tevelde Summary Input.xlsx |
| 8 | Ron Colburn | 4842 Ron Colburn Summary Input.xlsx |
if (length(unmatched) > 0) {
cat("::: {.warn-box}\n**Unmatched files** (skipped):\n\n")
cat(paste0("- `", unmatched, "`\n")); cat(":::\n")
} else {
cat("::: {.ok-box}\nAll files in DATA/ matched to a farm.\n:::\n")
}All files in DATA/ matched to a farm.
parse_summary |>
mutate(across(c(Yield,Fuel,Fert),
~cell_spec(.x, color=ifelse(.x==0,"#e74c3c","#2c3e50")))) |>
kbl(escape=FALSE, caption="Rows extracted per farm and data type") |>
kable_styling(bootstrap_options=c("striped","hover","condensed"),
full_width=FALSE, font_size=12) |>
row_spec(0, bold=TRUE, background="#2c3e50", color="white") |>
column_spec(3:5, width="80px")| PAI | Farm | Yield | Fuel | Fert |
|---|---|---|---|---|
| 1 | Dyt Farms | 5 | 7 | 43 |
| 2 | Hettinga Farms | 1 | 0 | 243 |
| 3 | Mineral King Sawtooth | 0 | 48 | 419 |
| 4 | Vander Dussen | 1 | 9 | 0 |
| 5 | DCB Farming | 5 | 0 | 196 |
| 6 | Eric Shannon Farming | 1 | 61 | 166 |
| 7 | B&R TeVelde | 0 | 28 | 161 |
| 8 | Ron Colburn | 0 | 33 | 0 |
tibble(
Table = c("FUEL MASTER","YIELD MASTER","FERTILISER MASTER"),
Rows = c(nrow(master_fuel), nrow(master_yield), nrow(master_fert)),
Columns = c(ncol(master_fuel), ncol(master_yield), ncol(master_fert)),
Farms = c(n_distinct(master_fuel$farm_id), n_distinct(master_yield$farm_id),
n_distinct(master_fert$farm_id)),
Years = c(paste(range(master_fuel$Year, na.rm=TRUE), collapse="–"),
paste(range(master_yield$Year, na.rm=TRUE), collapse="–"),
paste(range(master_fert$Year, na.rm=TRUE), collapse="–"))
) |> fmt_tbl(caption="Master table dimensions")| Table | Rows | Columns | Farms | Years |
|---|---|---|---|---|
| FUEL MASTER | 186 | 18 | 6 | 2020–2025 |
| YIELD MASTER | 13 | 12 | 5 | 2019–2024 |
| FERTILISER MASTER | 1228 | 19 | 6 | 2020–2025 |
tibble(
Tier=c("A","B","C","D","E","F"), `U_AD %`=c(5,10,15,25,50,75),
Description=c("Invoice/receipt; confirmed composition",
"Source document present; minor unit ambiguity",
"Blend/composition needs confirmation",
"Sold quantity (not confirmed consumption); estimated N%",
"Missing N%; rate/acre only; unconfirmed composition",
"Unknown unit; no source document")
) |>
kbl(caption="Data quality tier definitions") |>
kable_styling(bootstrap_options=c("striped","hover","condensed"),
full_width=FALSE, font_size=12) |>
row_spec(0, bold=TRUE, background="#2c3e50", color="white") |>
column_spec(1, bold=TRUE, background=TIER_COLS[c("A","B","C","D","E","F")], color="white")| Tier | U_AD % | Description |
|---|---|---|
| A | 5 | Invoice/receipt; confirmed composition |
| B | 10 | Source document present; minor unit ambiguity |
| C | 15 | Blend/composition needs confirmation |
| D | 25 | Sold quantity (not confirmed consumption); estimated N% |
| E | 50 | Missing N%; rate/acre only; unconfirmed composition |
| F | 75 | Unknown unit; no source document |
master_fuel |>
count(farm_id, data_tier) |>
mutate(farm_id = factor(farm_id, levels=rev(sort(unique(farm_id)))),
data_tier = factor(data_tier, levels=c("A","B","C","D","E","F"))) |>
ggplot(aes(x=n, y=farm_id, fill=data_tier)) +
geom_col(position="stack", width=0.65) +
scale_fill_manual(values=TIER_COLS, name="Tier", guide=guide_legend(reverse=TRUE)) +
scale_x_continuous(expand=expansion(mult=c(0,0.05))) +
labs(x="Records", y=NULL, title="Fuel Records by Data Quality Tier") +
theme_minimal(base_size=12) +
theme(panel.grid.major.y=element_blank(), legend.position="right")Fuel records by data quality tier per farm
master_fert |>
count(farm_id, data_tier) |>
mutate(farm_id = factor(farm_id, levels=rev(sort(unique(farm_id)))),
data_tier = factor(data_tier, levels=c("A","B","C","D","E","F"))) |>
ggplot(aes(x=n, y=farm_id, fill=data_tier)) +
geom_col(position="stack", width=0.65) +
scale_fill_manual(values=TIER_COLS, name="Tier", guide=guide_legend(reverse=TRUE)) +
scale_x_continuous(expand=expansion(mult=c(0,0.05))) +
labs(x="Records", y=NULL, title="Fertiliser Records by Data Quality Tier") +
theme_minimal(base_size=12) +
theme(panel.grid.major.y=element_blank(), legend.position="right")Fertiliser records by data quality tier per farm
fuel_cov <- master_fuel |> filter(!is.na(Year)) |> count(farm_id, Year) |>
mutate(farm_id=factor(farm_id, levels=names(FARM_COLS)), type="Fuel")
fert_cov <- master_fert |> filter(!is.na(Year)) |> count(farm_id, Year) |>
mutate(farm_id=factor(farm_id, levels=names(FARM_COLS)), type="Fertiliser")
bind_rows(fuel_cov, fert_cov) |>
ggplot(aes(x=Year, y=farm_id, fill=farm_id, size=n)) +
geom_point(shape=21, colour="white", stroke=0.4) +
facet_wrap(~type) +
scale_fill_manual(values=FARM_COLS, guide="none") +
scale_size_area(max_size=12, name="Records") +
scale_x_continuous(breaks=2019:2025) +
labs(x=NULL, y=NULL) +
theme_minimal(base_size=11) +
theme(panel.grid.minor=element_blank(),
strip.background=element_rect(fill="#2c3e50"),
strip.text=element_text(colour="white", face="bold"))Record coverage by farm and year (bubble = record count)
master_fuel |>
filter(!is.na(qty_gal), !is.na(Year), data_tier %in% c("A","B")) |>
group_by(farm_id, Year, fuel_type) |>
summarise(total_gal=sum(qty_gal, na.rm=TRUE), .groups="drop") |>
ggplot(aes(x=Year, y=total_gal, fill=fuel_type)) +
geom_col(width=0.7) +
facet_wrap(~farm_id, scales="free_y", ncol=2) +
scale_fill_brewer(palette="Set2", name="Fuel type") +
scale_y_continuous(labels=scales::comma) +
scale_x_continuous(breaks=2019:2025) +
labs(x=NULL, y="Gallons",
title="Annual Fuel Use by Farm",
caption="Farms with Gal/Acre-only data (B&R TeVelde, Ron Colburn) excluded") +
theme_minimal(base_size=11) +
theme(strip.background=element_rect(fill="#2c3e50"),
strip.text=element_text(colour="white", face="bold"),
panel.grid.minor=element_blank())Annual fuel consumption — invoice-backed records only
n_open <- nrow(master_flags)
if (n_open == 0) {
cat("::: {.ok-box}\nNo data flags raised.\n:::\n")
} else {
cat(sprintf("::: {.alert-box}\n**%d flag(s)** require attention before IQC sign-off.\n:::\n", n_open))
DT::datatable(
master_flags,
caption="Open data flags — resolve before PAI calculation",
filter="top", extensions="Buttons",
options=list(pageLength=15, scrollX=TRUE, dom="Bfrtip", buttons=c("csv","excel")),
rownames=FALSE) |>
DT::formatStyle("farm_id",
backgroundColor=DT::styleEqual(names(FARM_COLS), unname(FARM_COLS)),
color="white", fontWeight="bold")
}11 flag(s) require attention before IQC sign-off.
structure_audit |>
select(-source_filename) |>
kbl(caption="Structure audit — one row per farm") |>
kable_styling(bootstrap_options=c("striped","hover","condensed"),
full_width=TRUE, font_size=12) |>
row_spec(0, bold=TRUE, background="#2c3e50", color="white") |>
column_spec(7,
color=ifelse(grepl("✓", structure_audit$data_gaps),"#27ae60","#e74c3c"),
bold=TRUE)| pai_id | farm_id | fuel_rows | yield_rows | fert_rows | extra_sheets | data_gaps |
|---|---|---|---|---|---|---|
| 1 | Dyt Farms | 7 | 5 | 43 | — | ✓ OK |
| 2 | Hettinga Farms | 0 | 1 | 243 | — | ✓ OK |
| 3 | Mineral King Sawtooth | 48 | 0 | 419 | — | ⚠ Yield absent |
| 4 | Vander Dussen | 9 | 1 | 0 | — | ⚠ Fert absent |
| 5 | DCB Farming | 0 | 5 | 196 | — | ✓ OK |
| 6 | Eric Shannon Farming | 61 | 1 | 166 | — | ✓ OK |
| 7 | B&R TeVelde | 28 | 0 | 161 | — | ⚠ Yield absent |
| 8 | Ron Colburn | 33 | 0 | 0 | — | ⚠ Yield and Fert absent |
if (params$write_excel) {
out_path <- file.path(OUTPUT_DIR, "SAS_4842_Master_Farm_Inputs.xlsx")
cat(sprintf(paste0("::: {.ok-box}\n**Master Excel written:** `%s`\n\n",
"Sheets: STRUCTURE AUDIT | FUEL MASTER | YIELD MASTER | FERTILISER MASTER | DATA FLAGS\n\n",
"Totals: **%d** fuel rows | **%d** yield rows | **%d** fert rows | **%d** flags\n:::"),
normalizePath(out_path, mustWork=FALSE),
nrow(master_fuel), nrow(master_yield), nrow(master_fert), nrow(master_flags)))
} else {
cat("::: {.info-box}\n`write_excel = FALSE` — Excel export skipped (report-only mode).\n:::\n")
}Master Excel written:
C:\Users\Quan\Downloads\OUTPUT\SAS_4842_Master_Farm_Inputs.xlsx
Sheets: STRUCTURE AUDIT | FUEL MASTER | YIELD MASTER | FERTILISER MASTER | DATA FLAGS
Totals: 186 fuel rows | 13 yield rows | 1228 fert rows | 11 flags
R : R version 4.4.1 (2024-06-14 ucrt)
Platform : x86_64-w64-mingw32
Rendered : 2026-03-30 09:36:12 AEST
DATA dir : C:\Users\Quan\Downloads\DATA
pi <- installed.packages()[pkgs, c("Package","Version"), drop=FALSE]
cat(paste(sprintf(" %-16s %s", pi[,"Package"], pi[,"Version"]), collapse="\n")) readxl 1.4.5
writexl 1.5.4
dplyr 1.1.4
tidyr 1.3.1
stringr 1.5.1
knitr 1.50
kableExtra 1.4.0
ggplot2 4.0.0
scales 1.4.0
DT 0.34.0