library(jsonlite)
library(readr)
library(stringr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
soya_seasonB_CCseasonC.apsimx located in apsimx_runs folder. This is the base I need to modify and then is multiplied to 200 points for soil and weather
library(jsonlite)
# Load existing APSIMX JSON file
apsim_path <- "D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_CCseasonC.apsimx"
apsim_json <- fromJSON(apsim_path, simplifyVector = FALSE)
# Collect existing simulation names
existing_names <- sapply(apsim_json$Children, function(x) x$Name)
# Use the first simulation as the base template
template <- apsim_json$Children[[which(existing_names == "Soybean_point1")]]
# Create and append new simulations (only if names don't exist)
for (i in 2:200) {
new_name <- paste0("Soybean_point", i)
# Skip if this name already exists
if (new_name %in% existing_names) next
new_sim <- template
new_sim$Name <- new_name
# Update weather file path
for (child in new_sim$Children) {
if (!is.null(child$`$type`) && child$`$type` == "Models.Climate.Weather, Models") {
child$FileName <- paste0("D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/weather/point", i, ".met")
}
}
# Append to APSIM JSON structure
apsim_json$Children <- append(apsim_json$Children, list(new_sim))
}
# Write to new .apsimx file
output_file <- "D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_CCseasonC_1.apsimx"
write(toJSON(apsim_json, auto_unbox = TRUE, pretty = TRUE, null = "null"), output_file)
for each to have the specific weather and soils data from the site
library(jsonlite)
library(stringr)
library(dplyr)
# File paths
apsimx_file <- "D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_CCseasonC_1.apsimx"
apsimx_out <- "D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_FINAL.apsimx"
par_dir <- "D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/soils_par"
weather_dir <- "D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/weather"
# Update soil node - CORRECTED to use lists
update_soil_from_par <- function(existing_soil, par_path) {
if (is.null(existing_soil) || is.null(existing_soil$Children)) {
warning("existing_soil is NULL")
return(existing_soil)
}
if (!file.exists(par_path)) {
cat(" ❌ PAR FILE NOT FOUND:", par_path, "\n")
return(existing_soil)
}
lines <- readLines(par_path)
data <- lines[!str_starts(lines, "\\*") & str_detect(lines, "=")]
if (length(data) == 0) {
cat(" ❌ NO PARAMETERS FOUND!\n")
return(existing_soil)
}
data <- str_split_fixed(data, "=", 2)
df <- as.data.frame(data, stringsAsFactors = FALSE)
colnames(df) <- c("key", "value")
df <- df %>% mutate(key = tolower(trimws(key)), value = trimws(value))
get_val <- function(k) df$value[df$key == tolower(k)]
# CRITICAL FIX: Convert to LIST, not vector
parse_num_vec <- function(k) {
if (tolower(k) %in% df$key) {
vals <- as.numeric(unlist(str_split(get_val(k), ",")))
return(as.list(vals)) # ← RETURNS LIST!
} else {
return(NULL)
}
}
# Find Physical
physical <- NULL
for (child in existing_soil$Children) {
if (!is.null(child$Name) && child$Name == "Physical") {
physical <- child
break
}
}
if (!is.null(physical)) {
if (!is.null(v <- parse_num_vec("dlayer"))) physical$Thickness <- v
if (!is.null(v <- parse_num_vec("bd"))) physical$BD <- v
if (!is.null(v <- parse_num_vec("airdry"))) physical$AirDry <- v
if (!is.null(v <- parse_num_vec("ll15"))) physical$LL15 <- v
if (!is.null(v <- parse_num_vec("dul"))) physical$DUL <- v
if (!is.null(v <- parse_num_vec("sat"))) physical$SAT <- v
if (!is.null(v <- parse_num_vec("ks"))) physical$KS <- v
sand <- parse_num_vec("sand")
clay <- parse_num_vec("clay")
if (!is.null(sand)) physical$ParticleSizeSand <- sand
if (!is.null(clay)) physical$ParticleSizeClay <- clay
if (!is.null(sand) && !is.null(clay)) {
silt_vals <- as.list(100 - unlist(sand) - unlist(clay))
silt_vals <- lapply(silt_vals, function(x) max(x, 0))
physical$ParticleSizeSilt <- silt_vals
}
# Update Physical back
for (idx in seq_along(existing_soil$Children)) {
if (!is.null(existing_soil$Children[[idx]]$Name) &&
existing_soil$Children[[idx]]$Name == "Physical") {
existing_soil$Children[[idx]] <- physical
break
}
}
}
# Find and update Organic
organic <- NULL
for (child in existing_soil$Children) {
if (!is.null(child$Name) && child$Name == "Organic") {
organic <- child
break
}
}
if (!is.null(organic)) {
if (!is.null(v <- parse_num_vec("carbon"))) {
organic$Carbon <- v
organic$Thickness <- parse_num_vec("dlayer")
for (idx in seq_along(existing_soil$Children)) {
if (!is.null(existing_soil$Children[[idx]]$Name) &&
existing_soil$Children[[idx]]$Name == "Organic") {
existing_soil$Children[[idx]] <- organic
break
}
}
}
}
# Find and update Chemical
chemical <- NULL
for (child in existing_soil$Children) {
if (!is.null(child$Name) && child$Name == "Chemical") {
chemical <- child
break
}
}
if (!is.null(chemical)) {
if (!is.null(v <- parse_num_vec("ph"))) chemical$PH <- v
if (!is.null(v <- parse_num_vec("cec"))) chemical$CEC <- v
if (!is.null(v <- parse_num_vec("ec"))) chemical$EC <- v
if (!is.null(v <- parse_num_vec("dlayer"))) chemical$Thickness <- v
for (idx in seq_along(existing_soil$Children)) {
if (!is.null(existing_soil$Children[[idx]]$Name) &&
existing_soil$Children[[idx]]$Name == "Chemical") {
existing_soil$Children[[idx]] <- chemical
break
}
}
}
# Update metadata
if (!is.null(v <- get_val("latitude"))) existing_soil$Latitude <- as.numeric(v)
if (!is.null(v <- get_val("longitude"))) existing_soil$Longitude <- as.numeric(v)
if (!is.null(v <- get_val("site"))) existing_soil$Site <- v
if (!is.null(v <- get_val("country"))) existing_soil$Country <- v
return(existing_soil)
}
# Load APSIMX
cat("Loading APSIMX file...\n")
## Loading APSIMX file...
apsim_json <- fromJSON(apsimx_file, simplifyVector = FALSE)
cat("✓ Loaded", length(apsim_json$Children), "children\n\n")
## ✓ Loaded 201 children
# Update all simulations
updates_made <- 0
for (i in 2:length(apsim_json$Children)) {
sim <- apsim_json$Children[[i]]
if (is.null(sim$`$type`) || sim$`$type` != "Models.Core.Simulation, Models") {
next
}
sim_name <- sim$Name
point_id <- str_extract(sim_name, "point\\d+")
if (is.na(point_id)) {
cat("❌ Could not extract point ID from:", sim_name, "\n")
next
}
cat("Processing:", sim_name, "\n")
par_file <- file.path(par_dir, paste0(point_id, ".par"))
weather_file <- file.path(weather_dir, paste0(point_id, ".met"))
# Update Weather
for (j in seq_along(sim$Children)) {
if (!is.null(sim$Children[[j]]$`$type`) &&
sim$Children[[j]]$`$type` == "Models.Climate.Weather, Models") {
sim$Children[[j]]$FileName <- weather_file
}
}
# Update Soil
soil_updated <- FALSE
for (j in seq_along(sim$Children)) {
if (!is.null(sim$Children[[j]]$Name) && sim$Children[[j]]$Name == "Field") {
field <- sim$Children[[j]]
for (k in seq_along(field$Children)) {
if (!is.null(field$Children[[k]]$Name) && field$Children[[k]]$Name == "Soil") {
existing_soil <- field$Children[[k]]
updated_soil <- update_soil_from_par(existing_soil, par_file)
field$Children[[k]] <- updated_soil
soil_updated <- TRUE
sim$Children[[j]] <- field
break
}
}
if (soil_updated) break
}
}
apsim_json$Children[[i]] <- sim
updates_made <- updates_made + 1
if (updates_made %% 50 == 0) cat(" ✓", updates_made, "completed\n")
}
## Processing: Soybean_point1
## Processing: Soybean_point2
## Processing: Soybean_point3
## Processing: Soybean_point4
## Processing: Soybean_point5
## Processing: Soybean_point6
## Processing: Soybean_point7
## Processing: Soybean_point8
## Processing: Soybean_point9
## Processing: Soybean_point10
## Processing: Soybean_point11
## Processing: Soybean_point12
## Processing: Soybean_point13
## Processing: Soybean_point14
## Processing: Soybean_point15
## Processing: Soybean_point16
## Processing: Soybean_point17
## Processing: Soybean_point18
## Processing: Soybean_point19
## Processing: Soybean_point20
## Processing: Soybean_point21
## Processing: Soybean_point22
## Processing: Soybean_point23
## Processing: Soybean_point24
## Processing: Soybean_point25
## Processing: Soybean_point26
## Processing: Soybean_point27
## Processing: Soybean_point28
## Processing: Soybean_point29
## Processing: Soybean_point30
## Processing: Soybean_point31
## Processing: Soybean_point32
## Processing: Soybean_point33
## Processing: Soybean_point34
## Processing: Soybean_point35
## Processing: Soybean_point36
## Processing: Soybean_point37
## Processing: Soybean_point38
## Processing: Soybean_point39
## Processing: Soybean_point40
## Processing: Soybean_point41
## Processing: Soybean_point42
## Processing: Soybean_point43
## Processing: Soybean_point44
## Processing: Soybean_point45
## Processing: Soybean_point46
## Processing: Soybean_point47
## Processing: Soybean_point48
## Processing: Soybean_point49
## Processing: Soybean_point50
## ✓ 50 completed
## Processing: Soybean_point51
## Processing: Soybean_point52
## Processing: Soybean_point53
## Processing: Soybean_point54
## Processing: Soybean_point55
## Processing: Soybean_point56
## Processing: Soybean_point57
## Processing: Soybean_point58
## Processing: Soybean_point59
## Processing: Soybean_point60
## Processing: Soybean_point61
## Processing: Soybean_point62
## Processing: Soybean_point63
## Processing: Soybean_point64
## Processing: Soybean_point65
## Processing: Soybean_point66
## Processing: Soybean_point67
## Processing: Soybean_point68
## Processing: Soybean_point69
## Processing: Soybean_point70
## Processing: Soybean_point71
## Processing: Soybean_point72
## Processing: Soybean_point73
## Processing: Soybean_point74
## Processing: Soybean_point75
## Processing: Soybean_point76
## Processing: Soybean_point77
## Processing: Soybean_point78
## Processing: Soybean_point79
## Processing: Soybean_point80
## Processing: Soybean_point81
## Processing: Soybean_point82
## Processing: Soybean_point83
## Processing: Soybean_point84
## Processing: Soybean_point85
## Processing: Soybean_point86
## Processing: Soybean_point87
## Processing: Soybean_point88
## Processing: Soybean_point89
## Processing: Soybean_point90
## Processing: Soybean_point91
## Processing: Soybean_point92
## Processing: Soybean_point93
## Processing: Soybean_point94
## Processing: Soybean_point95
## Processing: Soybean_point96
## Processing: Soybean_point97
## Processing: Soybean_point98
## Processing: Soybean_point99
## Processing: Soybean_point100
## ✓ 100 completed
## Processing: Soybean_point101
## Processing: Soybean_point102
## Processing: Soybean_point103
## Processing: Soybean_point104
## Processing: Soybean_point105
## Processing: Soybean_point106
## Processing: Soybean_point107
## Processing: Soybean_point108
## Processing: Soybean_point109
## Processing: Soybean_point110
## Processing: Soybean_point111
## Processing: Soybean_point112
## Processing: Soybean_point113
## Processing: Soybean_point114
## Processing: Soybean_point115
## Processing: Soybean_point116
## Processing: Soybean_point117
## Processing: Soybean_point118
## Processing: Soybean_point119
## Processing: Soybean_point120
## Processing: Soybean_point121
## Processing: Soybean_point122
## Processing: Soybean_point123
## Processing: Soybean_point124
## Processing: Soybean_point125
## Processing: Soybean_point126
## Processing: Soybean_point127
## Processing: Soybean_point128
## Processing: Soybean_point129
## Processing: Soybean_point130
## Processing: Soybean_point131
## Processing: Soybean_point132
## Processing: Soybean_point133
## Processing: Soybean_point134
## Processing: Soybean_point135
## Processing: Soybean_point136
## Processing: Soybean_point137
## Processing: Soybean_point138
## Processing: Soybean_point139
## Processing: Soybean_point140
## Processing: Soybean_point141
## Processing: Soybean_point142
## Processing: Soybean_point143
## Processing: Soybean_point144
## Processing: Soybean_point145
## Processing: Soybean_point146
## Processing: Soybean_point147
## Processing: Soybean_point148
## Processing: Soybean_point149
## Processing: Soybean_point150
## ✓ 150 completed
## Processing: Soybean_point151
## Processing: Soybean_point152
## Processing: Soybean_point153
## Processing: Soybean_point154
## Processing: Soybean_point155
## Processing: Soybean_point156
## Processing: Soybean_point157
## Processing: Soybean_point158
## Processing: Soybean_point159
## Processing: Soybean_point160
## Processing: Soybean_point161
## Processing: Soybean_point162
## Processing: Soybean_point163
## Processing: Soybean_point164
## Processing: Soybean_point165
## Processing: Soybean_point166
## Processing: Soybean_point167
## Processing: Soybean_point168
## Processing: Soybean_point169
## Processing: Soybean_point170
## Processing: Soybean_point171
## Processing: Soybean_point172
## Processing: Soybean_point173
## Processing: Soybean_point174
## Processing: Soybean_point175
## Processing: Soybean_point176
## Processing: Soybean_point177
## Processing: Soybean_point178
## Processing: Soybean_point179
## Processing: Soybean_point180
## Processing: Soybean_point181
## Processing: Soybean_point182
## Processing: Soybean_point183
## Processing: Soybean_point184
## Processing: Soybean_point185
## Processing: Soybean_point186
## Processing: Soybean_point187
## Processing: Soybean_point188
## Processing: Soybean_point189
## Processing: Soybean_point190
## Processing: Soybean_point191
## Processing: Soybean_point192
## Processing: Soybean_point193
## Processing: Soybean_point194
## Processing: Soybean_point195
## Processing: Soybean_point196
## Processing: Soybean_point197
## Processing: Soybean_point198
## Processing: Soybean_point199
## Processing: Soybean_point200
## ✓ 200 completed
cat("\n" , 60, "═\n")
##
## 60 ═
cat("SUMMARY: Updated", updates_made, "simulations\n")
## SUMMARY: Updated 200 simulations
cat(60, "═\n\n")
## 60 ═
if (updates_made > 0) {
cat("Writing APSIMX file...\n")
json_text <- toJSON(apsim_json, pretty = TRUE, auto_unbox = TRUE, null = "null")
writeLines(json_text, apsimx_out)
cat("✅ COMPLETE! Saved to:\n", apsimx_out, "\n")
} else {
cat("❌ NO UPDATES WERE MADE!\n")
}
## Writing APSIMX file...
## ✅ COMPLETE! Saved to:
## D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_FINAL.apsimx
There is 3 output files: 1) Daily = daily recorded vairables 2) dates_sow = dates of soeing date for soyabeans 3) dates_mat = dates of maturity of the soyabeans
library(readxl)
library(ggplot2)
library(tidyr)
library(dplyr)
dates_mat <- read.csv("D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_FINAL.Report_dates_mat.csv")
dates_sow <- read.csv("D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_FINAL.Report_dates_sow.csv")
daily <- read.csv("D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_FINAL.Report_daily.csv")
library(dplyr)
library(ggplot2)
library(stringr)
# Convert Clock.Today from character to Date
dates_sow <- dates_sow %>%
mutate(Clock.Today = as.Date(Clock.Today))
# Create zone mapping
dates_with_zones <- dates_sow %>%
mutate(
# Extract point number from SimulationName
point_num = as.numeric(str_extract(SimulationName, "\\d+$")),
# Assign to zone (1-20)
Zone_num = ceiling(point_num / 20),
# Convert to day of year (1-365)
DayOfYear = as.numeric(format(Clock.Today, "%j")),
Year = as.numeric(format(Clock.Today, "%Y"))
)
# Create cumulative frequency by day of year for each zone
cumulative_freq <- dates_with_zones %>%
group_by(Zone_num, DayOfYear) %>%
summarise(
count = n(),
.groups = "drop"
) %>%
arrange(Zone_num, DayOfYear) %>%
group_by(Zone_num) %>%
mutate(
cumulative_count = cumsum(count),
cumulative_percent = (cumulative_count / max(cumulative_count)) * 100
) %>%
ungroup() %>%
rename(Zone = Zone_num)
# Calculate N (total observations) for each zone
zone_n <- cumulative_freq %>%
group_by(Zone) %>%
summarise(
N = max(cumulative_count),
.groups = "drop"
) %>%
mutate(Zone_label = paste0("Zone ", Zone, " (N=", N, ")"))
# Add zone labels to data
cumulative_freq <- cumulative_freq %>%
left_join(zone_n, by = "Zone")
# Visualization 1: Faceted by zone
p1 <- ggplot(cumulative_freq, aes(x = DayOfYear, y = cumulative_count, color = factor(Zone))) +
geom_line(size = 1) +
geom_point(size = 2) +
facet_wrap(~Zone_label, ncol = 4) +
labs(
title = "Cumulative Sowing Frequency by Day of Year - By Zone",
x = "Day of Year (1-365)",
y = "Cumulative Frequency",
color = "Zone"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
strip.text = element_text(size = 10, face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
legend.position = "bottom"
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(p1)
# Visualization 2: All zones together
p2 <- ggplot(cumulative_freq, aes(x = DayOfYear, y = cumulative_count, color = Zone_label, group = Zone)) +
geom_line(size = 1.2, alpha = 0.8) +
labs(
title = "Cumulative Sowing Frequency by Day of Year - All Zones",
x = "Day of Year (1-365)",
y = "Cumulative Frequency",
color = "Zone (N)"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
legend.position = "right",
legend.text = element_text(size = 9)
)
print(p2)
# Visualization 3: Percentage
p3 <- ggplot(cumulative_freq, aes(x = DayOfYear, y = cumulative_percent, color = Zone_label, group = Zone)) +
geom_line(size = 1.2, alpha = 0.8) +
labs(
title = "Cumulative Sowing Percentage by Day of Year",
x = "Day of Year (1-365)",
y = "Cumulative Percentage (%)",
color = "Zone (N)"
) +
ylim(0, 100) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
legend.position = "right"
)
print(p3)
# Summary table
summary_table <- zone_n %>%
left_join(
cumulative_freq %>%
group_by(Zone) %>%
summarise(
Min_DOY = min(DayOfYear),
Max_DOY = max(DayOfYear),
DOY_Range = Max_DOY - Min_DOY,
.groups = "drop"
),
by = "Zone"
) %>%
select(Zone, N, Min_DOY, Max_DOY, DOY_Range)
cat("\n╔════════════════════════════════════════════════════════╗\n")
##
## ╔════════════════════════════════════════════════════════╗
cat("║ SUMMARY STATISTICS BY ZONE ║\n")
## ║ SUMMARY STATISTICS BY ZONE ║
cat("╚════════════════════════════════════════════════════════╝\n\n")
## ╚════════════════════════════════════════════════════════╝
print(summary_table, n = 20)
## # A tibble: 10 × 5
## Zone N Min_DOY Max_DOY DOY_Range
## <dbl> <int> <dbl> <dbl> <dbl>
## 1 1 412 22 60 38
## 2 2 420 22 60 38
## 3 3 420 22 60 38
## 4 4 420 22 60 38
## 5 5 386 22 71 49
## 6 6 420 22 60 38
## 7 7 420 22 61 39
## 8 8 420 22 61 39
## 9 9 420 22 60 38
## 10 10 408 22 64 42
year = year of season of CC grown DOY= day of the year when the CC is planted Day= Day (DD-MM) when the CC in seeded point_year= Unique ID of point - year combination
# Extract year, day of year (DOY), and month-day (Day) from Clock.Today for all three dataframes
# For dates_mat (Clock.Today is character)
dates_mat$year <- as.numeric(substr(dates_mat$Clock.Today, 1, 4))
dates_mat$Day <- substr(dates_mat$Clock.Today, 6, 10) # MM-DD format
dates_mat$DOY <- as.numeric(format(as.Date(dates_mat$Clock.Today), "%j"))
# For dates_sow (Clock.Today is Date format)
dates_sow$year <- lubridate::year(dates_sow$Clock.Today)
# Alternative without lubridate:
# dates_sow$year <- as.numeric(format(dates_sow$Clock.Today, "%Y"))
dates_sow$Day <- format(dates_sow$Clock.Today, "%m-%d")
dates_sow$DOY <- as.numeric(format(dates_sow$Clock.Today, "%j"))
# For daily (Clock.Today is character)
daily$year <- as.numeric(substr(daily$Clock.Today, 1, 4))
daily$Day <- substr(daily$Clock.Today, 6, 10) # MM-DD format
daily$DOY <- as.numeric(format(as.Date(daily$Clock.Today), "%j"))
# Verify the new columns were added correctly
str(dates_mat[, c("year", "DOY", "Day")])
## 'data.frame': 4144 obs. of 3 variables:
## $ year: num 2005 2006 2007 2008 2009 ...
## $ DOY : num 138 157 147 146 147 156 169 176 146 149 ...
## $ Day : chr "05-18" "06-06" "05-27" "05-25" ...
str(dates_sow[, c("year", "DOY", "Day")])
## 'data.frame': 4146 obs. of 3 variables:
## $ year: num 2005 2006 2007 2008 2009 ...
## $ DOY : num 27 35 22 22 22 39 50 56 31 29 ...
## $ Day : chr "01-27" "02-04" "01-22" "01-22" ...
str(daily[, c("year", "DOY", "Day")])
## 'data.frame': 1499373 obs. of 3 variables:
## $ year: num 2005 2005 2005 2005 2005 ...
## $ DOY : num 1 2 3 4 5 6 7 8 9 10 ...
## $ Day : chr "01-01" "01-02" "01-03" "01-04" ...
# Quick check to see the values
head(dates_mat[, c("Clock.Today", "year", "DOY", "Day")], 10)
## Clock.Today year DOY Day
## 1 2005-05-18 2005 138 05-18
## 2 2006-06-06 2006 157 06-06
## 3 2007-05-27 2007 147 05-27
## 4 2008-05-25 2008 146 05-25
## 5 2009-05-27 2009 147 05-27
## 6 2010-06-05 2010 156 06-05
## 7 2011-06-18 2011 169 06-18
## 8 2012-06-24 2012 176 06-24
## 9 2013-05-26 2013 146 05-26
## 10 2014-05-29 2014 149 05-29
head(dates_sow[, c("Clock.Today", "year", "DOY", "Day")], 10)
## Clock.Today year DOY Day
## 1 2005-01-27 2005 27 01-27
## 2 2006-02-04 2006 35 02-04
## 3 2007-01-22 2007 22 01-22
## 4 2008-01-22 2008 22 01-22
## 5 2009-01-22 2009 22 01-22
## 6 2010-02-08 2010 39 02-08
## 7 2011-02-19 2011 50 02-19
## 8 2012-02-25 2012 56 02-25
## 9 2013-01-31 2013 31 01-31
## 10 2014-01-29 2014 29 01-29
head(daily[, c("Clock.Today", "year", "DOY", "Day")], 10)
## Clock.Today year DOY Day
## 1 2005-01-01 2005 1 01-01
## 2 2005-01-02 2005 2 01-02
## 3 2005-01-03 2005 3 01-03
## 4 2005-01-04 2005 4 01-04
## 5 2005-01-05 2005 5 01-05
## 6 2005-01-06 2005 6 01-06
## 7 2005-01-07 2005 7 01-07
## 8 2005-01-08 2005 8 01-08
## 9 2005-01-09 2005 9 01-09
## 10 2005-01-10 2005 10 01-10
# Now the unique ID
# Create point_season variable for all three dataframes
# Format: point_number_year (e.g., 34_2010)
# For dates_mat
dates_mat$point_season <- paste(
gsub("Soybean_point", "", dates_mat$SimulationName),
dates_mat$year,
sep = "_"
)
# For dates_sow
dates_sow$point_season <- paste(
gsub("Soybean_point", "", dates_sow$SimulationName),
dates_sow$year,
sep = "_"
)
# For daily
daily$point_season <- paste(
gsub("Soybean_point", "", daily$SimulationName),
daily$year,
sep = "_"
)
# Verify the new point_season column
head(dates_mat[, c("SimulationName", "year", "point_season")], 10)
## SimulationName year point_season
## 1 Soybean_point1 2005 1_2005
## 2 Soybean_point1 2006 1_2006
## 3 Soybean_point1 2007 1_2007
## 4 Soybean_point1 2008 1_2008
## 5 Soybean_point1 2009 1_2009
## 6 Soybean_point1 2010 1_2010
## 7 Soybean_point1 2011 1_2011
## 8 Soybean_point1 2012 1_2012
## 9 Soybean_point1 2013 1_2013
## 10 Soybean_point1 2014 1_2014
head(dates_sow[, c("SimulationName", "year", "point_season")], 10)
## SimulationName year point_season
## 1 Soybean_point1 2005 1_2005
## 2 Soybean_point1 2006 1_2006
## 3 Soybean_point1 2007 1_2007
## 4 Soybean_point1 2008 1_2008
## 5 Soybean_point1 2009 1_2009
## 6 Soybean_point1 2010 1_2010
## 7 Soybean_point1 2011 1_2011
## 8 Soybean_point1 2012 1_2012
## 9 Soybean_point1 2013 1_2013
## 10 Soybean_point1 2014 1_2014
head(daily[, c("SimulationName", "year", "point_season")], 10)
## SimulationName year point_season
## 1 Soybean_point1 2005 1_2005
## 2 Soybean_point1 2005 1_2005
## 3 Soybean_point1 2005 1_2005
## 4 Soybean_point1 2005 1_2005
## 5 Soybean_point1 2005 1_2005
## 6 Soybean_point1 2005 1_2005
## 7 Soybean_point1 2005 1_2005
## 8 Soybean_point1 2005 1_2005
## 9 Soybean_point1 2005 1_2005
## 10 Soybean_point1 2005 1_2005
# Check unique point_season values
cat("Unique point_season values in dates_mat:\n")
## Unique point_season values in dates_mat:
print(n_distinct(dates_mat$point_season))
## [1] 4144
cat("Unique point_season values in dates_sow:\n")
## Unique point_season values in dates_sow:
print(n_distinct(dates_sow$point_season))
## [1] 4146
cat("Unique point_season values in daily:\n")
## Unique point_season values in daily:
print(n_distinct(daily$point_season))
## [1] 4172
# Add ag_zone variable to daily, dates_mat, and dates_sow
# ag_zone is based on point number from point_season:
# 1-20 = zone 1, 21-40 = zone 2, ..., 181-200 = zone 20
library(dplyr)
# ===== For dates_mat =====
# Extract point number from point_season (remove the "_YYYY" part)
dates_mat$point_number <- as.numeric(gsub("_.*", "", dates_mat$point_season))
# Calculate ag_zone: ceiling(point_number / 20)
dates_mat$ag_zone <- ceiling(dates_mat$point_number / 20)
cat("=== dates_mat ag_zone ===\n")
## === dates_mat ag_zone ===
cat("Point number range:", range(dates_mat$point_number, na.rm = TRUE), "\n")
## Point number range: 1 200
cat("ag_zone range:", range(dates_mat$ag_zone, na.rm = TRUE), "\n")
## ag_zone range: 1 10
print(table(dates_mat$ag_zone))
##
## 1 2 3 4 5 6 7 8 9 10
## 411 420 420 420 385 420 420 420 420 408
# ===== For dates_sow =====
# Extract point number from point_season
dates_sow$point_number <- as.numeric(gsub("_.*", "", dates_sow$point_season))
# Calculate ag_zone
dates_sow$ag_zone <- ceiling(dates_sow$point_number / 20)
cat("\n=== dates_sow ag_zone ===\n")
##
## === dates_sow ag_zone ===
cat("Point number range:", range(dates_sow$point_number, na.rm = TRUE), "\n")
## Point number range: 1 200
cat("ag_zone range:", range(dates_sow$ag_zone, na.rm = TRUE), "\n")
## ag_zone range: 1 10
print(table(dates_sow$ag_zone))
##
## 1 2 3 4 5 6 7 8 9 10
## 412 420 420 420 386 420 420 420 420 408
# ===== For daily =====
# Extract point number from point_season
daily$point_number <- as.numeric(gsub("_.*", "", daily$point_season))
# Calculate ag_zone
daily$ag_zone <- ceiling(daily$point_number / 20)
cat("\n=== daily ag_zone ===\n")
##
## === daily ag_zone ===
cat("Point number range:", range(daily$point_number, na.rm = TRUE), "\n")
## Point number range: 1 200
cat("ag_zone range:", range(daily$ag_zone, na.rm = TRUE), "\n")
## ag_zone range: 1 10
print(table(daily$ag_zone))
##
## 1 2 3 4 5 6 7 8 9 10
## 147977 150980 150980 150980 143556 150980 150980 150980 150980 150980
# ===== Verify with examples =====
cat("\n=== Verification Examples ===\n")
##
## === Verification Examples ===
cat("Points 1-20 should be ag_zone 1:\n")
## Points 1-20 should be ag_zone 1:
print(unique(dates_mat$ag_zone[dates_mat$point_number >= 1 & dates_mat$point_number <= 20]))
## [1] 1
cat("Points 21-40 should be ag_zone 2:\n")
## Points 21-40 should be ag_zone 2:
print(unique(dates_mat$ag_zone[dates_mat$point_number >= 21 & dates_mat$point_number <= 40]))
## [1] 2
cat("Points 181-200 should be ag_zone 20:\n")
## Points 181-200 should be ag_zone 20:
print(unique(dates_mat$ag_zone[dates_mat$point_number >= 181 & dates_mat$point_number <= 200]))
## [1] 10
cat("\nDone!\n")
##
## Done!
This is the period range when we monitor conditions to broadcast a cover crop before soya is harvested = maturity minus 30 days of the soyabeans
# Add monitoring period variables to dates_mat for cover crop seeding decision
# start_monitoring: 30 days before soybean harvest (Clock.Today - 30 days)
# end_monitoring: soybean harvest date (Clock.Today)
# First, convert Clock.Today to Date format if it isn't already
dates_mat$Clock.Today_date <- as.Date(dates_mat$Clock.Today)
# Create start_monitoring: 30 days before harvest
dates_mat$start_monitoring <- dates_mat$Clock.Today_date - 30
# Create end_monitoring: harvest date (same as Clock.Today but as Date)
dates_mat$end_monitoring <- dates_mat$Clock.Today_date
# Verify the new variables
head(dates_mat[, c("SimulationName", "year", "point_season", "Clock.Today",
"start_monitoring", "end_monitoring")], 15)
## SimulationName year point_season Clock.Today start_monitoring end_monitoring
## 1 Soybean_point1 2005 1_2005 2005-05-18 2005-04-18 2005-05-18
## 2 Soybean_point1 2006 1_2006 2006-06-06 2006-05-07 2006-06-06
## 3 Soybean_point1 2007 1_2007 2007-05-27 2007-04-27 2007-05-27
## 4 Soybean_point1 2008 1_2008 2008-05-25 2008-04-25 2008-05-25
## 5 Soybean_point1 2009 1_2009 2009-05-27 2009-04-27 2009-05-27
## 6 Soybean_point1 2010 1_2010 2010-06-05 2010-05-06 2010-06-05
## 7 Soybean_point1 2011 1_2011 2011-06-18 2011-05-19 2011-06-18
## 8 Soybean_point1 2012 1_2012 2012-06-24 2012-05-25 2012-06-24
## 9 Soybean_point1 2013 1_2013 2013-05-26 2013-04-26 2013-05-26
## 10 Soybean_point1 2014 1_2014 2014-05-29 2014-04-29 2014-05-29
## 11 Soybean_point1 2015 1_2015 2015-06-20 2015-05-21 2015-06-20
## 12 Soybean_point1 2016 1_2016 2016-05-03 2016-04-03 2016-05-03
## 13 Soybean_point1 2017 1_2017 2017-05-23 2017-04-23 2017-05-23
## 14 Soybean_point1 2018 1_2018 2018-07-05 2018-06-05 2018-07-05
## 15 Soybean_point1 2019 1_2019 2019-06-14 2019-05-15 2019-06-14
# Check the structure
str(dates_mat[, c("start_monitoring", "end_monitoring")])
## 'data.frame': 4144 obs. of 2 variables:
## $ start_monitoring: Date, format: "2005-04-18" "2006-05-07" ...
## $ end_monitoring : Date, format: "2005-05-18" "2006-06-06" ...
# Summary to confirm dates look reasonable
cat("Start monitoring range:\n")
## Start monitoring range:
print(range(dates_mat$start_monitoring, na.rm = TRUE))
## [1] "2005-04-08" "2025-06-06"
cat("End monitoring range:\n")
## End monitoring range:
print(range(dates_mat$end_monitoring, na.rm = TRUE))
## [1] "2005-05-08" "2025-07-06"
# Check the difference (should be ~30 days)
cat("Difference between end_monitoring and start_monitoring (should be ~30 days):\n")
## Difference between end_monitoring and start_monitoring (should be ~30 days):
print(summary(dates_mat$end_monitoring - dates_mat$start_monitoring))
## Length Class Mode
## 4144 difftime numeric
Recent rainfall: Rain 5 days ago > 40 mm Rain 4 days ago > 35 mm Rain 3 days ago > 30 mm Rain 2 days ago or 1 day ago or same day > 20 mm total Rain accumulated last 5 days > 50 mm Return the first day this condition is met, or “N/P” if never met
# Determine sowing_CC date for cover crop based on rainfall conditions
# SIMPLIFIED VERSION with better error handling
library(dplyr)
# Ensure dates are in Date format
dates_mat$Clock.Today_date <- as.Date(dates_mat$Clock.Today)
daily$Clock.Today_date <- as.Date(daily$Clock.Today)
# Initialize the sowing_CC column
dates_mat$sowing_CC <- NA_character_
# Create a lookup table from daily data for faster access
daily_sorted <- daily %>%
select(point_season, Clock.Today_date, Weather.Rain) %>%
arrange(point_season, Clock.Today_date)
# Get unique point_season values
unique_points <- unique(dates_mat$point_season)
# Process each unique point_season
for (ps in unique_points) {
# Get all rows for this point_season in dates_mat
mat_rows <- which(dates_mat$point_season == ps)
# Get daily data for this point_season
daily_ps <- daily_sorted %>% filter(point_season == ps)
if (nrow(daily_ps) == 0) {
# No data available
dates_mat$sowing_CC[mat_rows] <- NA
next
}
# Process each date row for this point_season
for (row_idx in mat_rows) {
start_date <- dates_mat$start_monitoring[row_idx]
end_date <- dates_mat$end_monitoring[row_idx]
# Check each day in the monitoring period
found_date <- NA_character_
for (check_date in seq(start_date, end_date, by = "day")) {
# Get rainfall values for specific days
rain_5_days_ago <- daily_ps$Weather.Rain[daily_ps$Clock.Today_date == (check_date - 5)]
rain_4_days_ago <- daily_ps$Weather.Rain[daily_ps$Clock.Today_date == (check_date - 4)]
rain_3_days_ago <- daily_ps$Weather.Rain[daily_ps$Clock.Today_date == (check_date - 3)]
rain_2_days_ago <- daily_ps$Weather.Rain[daily_ps$Clock.Today_date == (check_date - 2)]
rain_1_day_ago <- daily_ps$Weather.Rain[daily_ps$Clock.Today_date == (check_date - 1)]
rain_today <- daily_ps$Weather.Rain[daily_ps$Clock.Today_date == check_date]
# Calculate 5-day accumulated rain (4 days before through today)
rain_5day_sum <- sum(
daily_ps$Weather.Rain[daily_ps$Clock.Today_date >= (check_date - 4) &
daily_ps$Clock.Today_date <= check_date],
na.rm = TRUE
)
# Check conditions
condition_met <- FALSE
if (length(rain_5_days_ago) > 0 && !is.na(rain_5_days_ago) && rain_5_days_ago > 40) condition_met <- TRUE
if (length(rain_4_days_ago) > 0 && !is.na(rain_4_days_ago) && rain_4_days_ago > 35) condition_met <- TRUE
if (length(rain_3_days_ago) > 0 && !is.na(rain_3_days_ago) && rain_3_days_ago > 30) condition_met <- TRUE
if (length(rain_2_days_ago) > 0 && !is.na(rain_2_days_ago) && rain_2_days_ago > 20) condition_met <- TRUE
if (length(rain_1_day_ago) > 0 && !is.na(rain_1_day_ago) && rain_1_day_ago > 20) condition_met <- TRUE
if (length(rain_today) > 0 && !is.na(rain_today) && rain_today > 20) condition_met <- TRUE
if (rain_5day_sum > 50) condition_met <- TRUE
if (condition_met) {
found_date <- as.character(check_date)
break
}
}
dates_mat$sowing_CC[row_idx] <- found_date
}
}
# Display results
cat("\n=== Sowing CC Results ===\n")
##
## === Sowing CC Results ===
cat("Total rows:", nrow(dates_mat), "\n")
## Total rows: 4144
cat("Rows with sowing date found:", sum(!is.na(dates_mat$sowing_CC)), "\n")
## Rows with sowing date found: 1878
cat("Rows with NO sowing date (NA):", sum(is.na(dates_mat$sowing_CC)), "\n")
## Rows with NO sowing date (NA): 2266
cat("\n=== Sample of Results ===\n")
##
## === Sample of Results ===
print(head(dates_mat[, c("point_season", "start_monitoring", "end_monitoring", "sowing_CC")], 20))
## point_season start_monitoring end_monitoring sowing_CC
## 1 1_2005 2005-04-18 2005-05-18 <NA>
## 2 1_2006 2006-05-07 2006-06-06 13281
## 3 1_2007 2007-04-27 2007-05-27 13637
## 4 1_2008 2008-04-25 2008-05-25 14020
## 5 1_2009 2009-04-27 2009-05-27 14379
## 6 1_2010 2010-05-06 2010-06-05 <NA>
## 7 1_2011 2011-05-19 2011-06-18 <NA>
## 8 1_2012 2012-05-25 2012-06-24 <NA>
## 9 1_2013 2013-04-26 2013-05-26 15826
## 10 1_2014 2014-04-29 2014-05-29 <NA>
## 11 1_2015 2015-05-21 2015-06-20 <NA>
## 12 1_2016 2016-04-03 2016-05-03 16894
## 13 1_2017 2017-04-23 2017-05-23 <NA>
## 14 1_2018 2018-06-05 2018-07-05 <NA>
## 15 1_2019 2019-05-15 2019-06-14 <NA>
## 16 1_2020 2020-04-20 2020-05-20 18372
## 17 1_2021 2021-04-24 2021-05-24 18742
## 18 1_2022 2022-04-22 2022-05-22 <NA>
## 19 1_2023 2023-05-27 2023-06-26 <NA>
## 20 1_2024 2024-04-14 2024-05-14 19845
cat("\nDone!\n")
##
## Done!
# Convert sowing_CC from numeric to Date format
# This converts the numeric representation (days since 1970-01-01) to proper dates
dates_mat$sowing_CC <- as.Date(as.numeric(dates_mat$sowing_CC), origin = "1970-01-01")
# Check that sowing_CC is always between start_monitoring and end_monitoring
# First, convert sowing_CC to Date if not already
dates_mat$sowing_CC <- as.Date(dates_mat$sowing_CC, origin = "1970-01-01")
# Create a check variable
dates_mat$sowing_in_range <- NA
# For rows where sowing_CC is not NA, check if it's in range
dates_mat$sowing_in_range[!is.na(dates_mat$sowing_CC)] <-
(dates_mat$sowing_CC[!is.na(dates_mat$sowing_CC)] >= dates_mat$start_monitoring[!is.na(dates_mat$sowing_CC)]) &
(dates_mat$sowing_CC[!is.na(dates_mat$sowing_CC)] <= dates_mat$end_monitoring[!is.na(dates_mat$sowing_CC)])
# Summary
cat("=== Sowing Date Range Check ===\n")
## === Sowing Date Range Check ===
cat("Total rows:", nrow(dates_mat), "\n")
## Total rows: 4144
cat("Rows with sowing_CC (not NA):", sum(!is.na(dates_mat$sowing_CC)), "\n")
## Rows with sowing_CC (not NA): 1878
cat("Rows with sowing_CC IN range:", sum(dates_mat$sowing_in_range, na.rm = TRUE), "\n")
## Rows with sowing_CC IN range: 1878
cat("Rows with sowing_CC OUT of range:", sum(!dates_mat$sowing_in_range, na.rm = TRUE), "\n")
## Rows with sowing_CC OUT of range: 0
# Show any rows OUT of range
if (sum(!dates_mat$sowing_in_range, na.rm = TRUE) > 0) {
cat("\n=== ROWS OUT OF RANGE ===\n")
out_of_range <- dates_mat[dates_mat$sowing_in_range == FALSE,
c("point_season", "start_monitoring", "sowing_CC", "end_monitoring")]
print(out_of_range)
} else {
cat("\n✓ All sowing dates are within the monitoring period!\n")
}
##
## ✓ All sowing dates are within the monitoring period!
The approach: Determine the date when it will emerge (sowing +5 days) For that date determine available water in the 0-50 soil layer Then cacluate how much rain from that daye for the next 2 month Then add the available water + the rain to estimate CC_available water
# Extract soil water (PAW) at time of cover crop sowing
# PAW_50_sowCC = Soil.Water.PAWmm.1. value on the day of CC sowing
library(dplyr)
# Step 1: Calculate DOY for sowing_CC in dates_mat
dates_mat$sowing_DOY <- as.numeric(format(dates_mat$sowing_CC, "%j"))
# Step 2: Initialize PAW_50_sowCC variable
dates_mat$PAW_50_sowCC <- NA_real_
# Step 3: For each row where sowing_in_range = TRUE, find matching daily data
for (i in 1:nrow(dates_mat)) {
# Only process rows where CC was sown (sowing_in_range = TRUE)
if (!is.na(dates_mat$sowing_in_range[i]) && dates_mat$sowing_in_range[i] == TRUE) {
# Get point_season and sowing_DOY for this row
ps <- dates_mat$point_season[i]
doy <- dates_mat$sowing_DOY[i]
# Find matching row in daily data
match_row <- daily %>%
filter(point_season == ps & DOY == doy) %>%
select(Soil.Water.PAWmm.1.)
# Extract the value if found
if (nrow(match_row) > 0) {
dates_mat$PAW_50_sowCC[i] <- match_row$Soil.Water.PAWmm.1.[1] # Take first match if multiple
}
}
}
# Summary
cat("=== PAW_50_sowCC Results ===\n")
## === PAW_50_sowCC Results ===
cat("Rows with sowing_in_range = TRUE:", sum(dates_mat$sowing_in_range, na.rm = TRUE), "\n")
## Rows with sowing_in_range = TRUE: 1878
cat("Rows with PAW_50_sowCC value found:", sum(!is.na(dates_mat$PAW_50_sowCC)), "\n")
## Rows with PAW_50_sowCC value found: 1878
cat("Rows with PAW_50_sowCC missing:", sum(is.na(dates_mat$PAW_50_sowCC[dates_mat$sowing_in_range == TRUE]), na.rm = TRUE), "\n")
## Rows with PAW_50_sowCC missing: 2266
cat("\nBasic statistics of PAW_50_sowCC:\n")
##
## Basic statistics of PAW_50_sowCC:
print(summary(dates_mat$PAW_50_sowCC))
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 4.666 8.915 9.630 9.621 10.258 12.642 2266
cat("\n=== Sample Results ===\n")
##
## === Sample Results ===
print(head(dates_mat[dates_mat$sowing_in_range == TRUE,
c("point_season", "sowing_CC", "sowing_DOY", "PAW_50_sowCC")], 15))
## point_season sowing_CC sowing_DOY PAW_50_sowCC
## NA <NA> <NA> NA NA
## 2 1_2006 2006-05-13 133 9.789
## 3 1_2007 2007-05-04 124 9.651
## 4 1_2008 2008-05-21 142 9.454
## 5 1_2009 2009-05-15 135 9.648
## NA.1 <NA> <NA> NA NA
## NA.2 <NA> <NA> NA NA
## NA.3 <NA> <NA> NA NA
## 9 1_2013 2013-05-01 121 9.726
## NA.4 <NA> <NA> NA NA
## NA.5 <NA> <NA> NA NA
## 12 1_2016 2016-04-03 94 9.881
## NA.6 <NA> <NA> NA NA
## NA.7 <NA> <NA> NA NA
## NA.8 <NA> <NA> NA NA
# Calculate total rainfall from CC sowing to 60 days after
# rain_CC = sum of Weather.Rain from sowing_CC to sowing_CC + 60 days
library(dplyr)
# Initialize rain_CC variable
dates_mat$rain_CC <- NA_real_
# For each row where sowing_in_range = TRUE, sum rainfall for 60 days after sowing
for (i in 1:nrow(dates_mat)) {
# Only process rows where CC was sown (sowing_in_range = TRUE)
if (!is.na(dates_mat$sowing_in_range[i]) && dates_mat$sowing_in_range[i] == TRUE) {
# Get point_season and sowing date
ps <- dates_mat$point_season[i]
sowing_date <- dates_mat$sowing_CC[i]
end_date <- sowing_date + 60 # 60 days after sowing
# Sum rainfall from sowing date to 60 days after
rain_sum <- daily %>%
filter(point_season == ps &
Clock.Today_date >= sowing_date &
Clock.Today_date <= end_date) %>%
pull(Weather.Rain) %>%
sum(na.rm = TRUE)
# Store the sum
dates_mat$rain_CC[i] <- rain_sum
}
}
# Summary
cat("=== rain_CC Results ===\n")
## === rain_CC Results ===
cat("Rows with sowing_in_range = TRUE:", sum(dates_mat$sowing_in_range, na.rm = TRUE), "\n")
## Rows with sowing_in_range = TRUE: 1878
cat("Rows with rain_CC value found:", sum(!is.na(dates_mat$rain_CC)), "\n")
## Rows with rain_CC value found: 1878
cat("\nBasic statistics of rain_CC (mm):\n")
##
## Basic statistics of rain_CC (mm):
print(summary(dates_mat$rain_CC))
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 26.7 105.3 162.2 191.8 259.3 511.8 2266
cat("\n=== Sample Results ===\n")
##
## === Sample Results ===
print(head(dates_mat[dates_mat$sowing_in_range == TRUE,
c("point_season", "sowing_CC", "PAW_50_sowCC", "rain_CC")], 15))
## point_season sowing_CC PAW_50_sowCC rain_CC
## NA <NA> <NA> NA NA
## 2 1_2006 2006-05-13 9.789 52.8
## 3 1_2007 2007-05-04 9.651 364.2
## 4 1_2008 2008-05-21 9.454 405.8
## 5 1_2009 2009-05-15 9.648 88.1
## NA.1 <NA> <NA> NA NA
## NA.2 <NA> <NA> NA NA
## NA.3 <NA> <NA> NA NA
## 9 1_2013 2013-05-01 9.726 136.3
## NA.4 <NA> <NA> NA NA
## NA.5 <NA> <NA> NA NA
## 12 1_2016 2016-04-03 9.881 271.6
## NA.6 <NA> <NA> NA NA
## NA.7 <NA> <NA> NA NA
## NA.8 <NA> <NA> NA NA
cat("\nDone!\n")
##
## Done!
# Calculate total water available for cover crop
# Total_CCAW_CC = PAW_50_sowCC + rain_CC
dates_mat$Total_CCAW_CC <- dates_mat$PAW_50_sowCC + dates_mat$rain_CC
# Summary
cat("=== Total_CCAW_CC Results ===\n")
## === Total_CCAW_CC Results ===
cat("Rows with Total_CCAW_CC value:", sum(!is.na(dates_mat$Total_CCAW_CC)), "\n")
## Rows with Total_CCAW_CC value: 1878
cat("\nBasic statistics of Total_CCAW_CC (mm):\n")
##
## Basic statistics of Total_CCAW_CC (mm):
print(summary(dates_mat$Total_CCAW_CC))
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 35.01 113.70 171.76 201.42 269.69 522.51 2266
cat("\n=== Sample Results ===\n")
##
## === Sample Results ===
print(head(dates_mat[dates_mat$sowing_in_range == TRUE,
c("point_season", "sowing_CC", "PAW_50_sowCC", "rain_CC", "Total_CCAW_CC")], 15))
## point_season sowing_CC PAW_50_sowCC rain_CC Total_CCAW_CC
## NA <NA> <NA> NA NA NA
## 2 1_2006 2006-05-13 9.789 52.8 62.589
## 3 1_2007 2007-05-04 9.651 364.2 373.851
## 4 1_2008 2008-05-21 9.454 405.8 415.254
## 5 1_2009 2009-05-15 9.648 88.1 97.748
## NA.1 <NA> <NA> NA NA NA
## NA.2 <NA> <NA> NA NA NA
## NA.3 <NA> <NA> NA NA NA
## 9 1_2013 2013-05-01 9.726 136.3 146.026
## NA.4 <NA> <NA> NA NA NA
## NA.5 <NA> <NA> NA NA NA
## 12 1_2016 2016-04-03 9.881 271.6 281.481
## NA.6 <NA> <NA> NA NA NA
## NA.7 <NA> <NA> NA NA NA
## NA.8 <NA> <NA> NA NA NA
cat("\nDone!\n")
##
## Done!
#— 3. Figures ## CUmmulative freequescy of total AW for the CC
# Create cumulative frequency plot for Total_CCAW_CC by ag_zone and point_number
# Filter for sowing_in_range = TRUE only
library(ggplot2)
library(dplyr)
# Filter data: only rows where sowing_in_range = TRUE and Total_CCAW_CC is not NA
data_plot <- dates_mat %>%
filter(sowing_in_range == TRUE & !is.na(Total_CCAW_CC)) %>%
select(ag_zone, point_number, Total_CCAW_CC)
# For each ag_zone and point_number, calculate cumulative frequency
data_cumfreq <- data_plot %>%
group_by(ag_zone, point_number) %>%
arrange(Total_CCAW_CC) %>%
mutate(
rank = row_number(),
n = n(),
cumfreq = rank / n # Cumulative frequency (0 to 1)
) %>%
ungroup()
# Calculate overall cumulative frequency for each ag_zone (combining ALL points)
overall_cumfreq <- data_plot %>%
group_by(ag_zone) %>%
arrange(Total_CCAW_CC) %>%
mutate(
rank = row_number(),
n_total = n(),
overall_cumfreq = rank / n_total
) %>%
ungroup() %>%
select(ag_zone, Total_CCAW_CC, overall_cumfreq)
# Calculate statistics for each ag_zone
stats_by_zone <- dates_mat %>%
filter(!is.na(ag_zone)) %>%
group_by(ag_zone) %>%
summarise(
n_points = n_distinct(point_number),
pct_sowing = round(sum(sowing_in_range == TRUE, na.rm = TRUE) / n() * 100, 1)
)
# Create the plot with individual point lines + average black line
plot_cumfreq <- ggplot() +
# Individual point lines (colored)
geom_line(data = data_cumfreq, aes(x = Total_CCAW_CC, y = cumfreq, color = factor(point_number)),
size = 0.6, alpha = 0.6) +
# Overall cumulative line for all points combined (black, thicker)
geom_line(data = overall_cumfreq, aes(x = Total_CCAW_CC, y = overall_cumfreq),
color = "black", size = 1.2) +
facet_wrap(~ag_zone, nrow = 5, labeller = labeller(ag_zone = c(
"1" = "ag_zone 1", "2" = "ag_zone 2", "3" = "ag_zone 3", "4" = "ag_zone 4",
"5" = "ag_zone 5", "6" = "ag_zone 6", "7" = "ag_zone 7", "8" = "ag_zone 8",
"9" = "ag_zone 9", "10" = "ag_zone 10", "11" = "ag_zone 11", "12" = "ag_zone 12",
"13" = "ag_zone 13", "14" = "ag_zone 14", "15" = "ag_zone 15", "16" = "ag_zone 16",
"17" = "ag_zone 17", "18" = "ag_zone 18", "19" = "ag_zone 19", "20" = "ag_zone 20"
))) +
geom_text(
data = stats_by_zone,
aes(x = Inf, y = 0.05, label = paste("N =", n_points, "\n", pct_sowing, "% sowing")),
hjust = 1.1, vjust = 0, size = 3.5, color = "black", family = "monospace"
) +
labs(
title = "Cumulative Frequency of Total Water Available for Cover Crop (Total_CCAW_CC)",
subtitle = "Colored lines: individual points | Black line: cumulative for all data points combined in zone",
x = "Total_CCAW_CC (mm)",
y = "Cumulative Frequency"
) +
theme_minimal() +
theme(
panel.border = element_rect(color = "gray", fill = NA),
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 11, face = "italic"),
legend.position = "none"
)
# Display the plot
print(plot_cumfreq)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
# Summary statistics
cat("=== Summary by ag_zone ===\n")
## === Summary by ag_zone ===
print(stats_by_zone)
## # A tibble: 10 × 3
## ag_zone n_points pct_sowing
## <dbl> <int> <dbl>
## 1 1 20 45.7
## 2 2 20 46.2
## 3 3 20 48.6
## 4 4 20 47.4
## 5 5 19 50.4
## 6 6 20 42.9
## 7 7 20 45.2
## 8 8 20 46.4
## 9 9 20 28.6
## 10 10 20 52.5
cat("\nTotal rows with sowing_in_range = TRUE:", nrow(data_plot), "\n")
##
## Total rows with sowing_in_range = TRUE: 1878
cat("Total unique points:", n_distinct(data_plot$point_number), "\n")
## Total unique points: 199
# Save the plot
ggsave("cumulative_frequency_CCAW.png", plot_cumfreq, width = 16, height = 12, dpi = 300)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
cat("\nPlot saved as: cumulative_frequency_CCAW.png\n")
##
## Plot saved as: cumulative_frequency_CCAW.png