library(jsonlite)
library(readr)
library(stringr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

— 1. APSIM simulations

We multiply to 200 the based model of soya in season B

soya_seasonB_CCseasonC.apsimx located in apsimx_runs folder. This is the base I need to modify and then is multiplied to 200 points for soil and weather

library(jsonlite)

# Load existing APSIMX JSON file
apsim_path <- "D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_CCseasonC.apsimx"
apsim_json <- fromJSON(apsim_path, simplifyVector = FALSE)

# Collect existing simulation names
existing_names <- sapply(apsim_json$Children, function(x) x$Name)

# Use the first simulation as the base template
template <- apsim_json$Children[[which(existing_names == "Soybean_point1")]]

# Create and append new simulations (only if names don't exist)
for (i in 2:200) {
  new_name <- paste0("Soybean_point", i)
  
  # Skip if this name already exists
  if (new_name %in% existing_names) next
  
  new_sim <- template
  new_sim$Name <- new_name
  
  # Update weather file path
  for (child in new_sim$Children) {
    if (!is.null(child$`$type`) && child$`$type` == "Models.Climate.Weather, Models") {
      child$FileName <- paste0("D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/weather/point", i, ".met")
    }
  }
  
  # Append to APSIM JSON structure
  apsim_json$Children <- append(apsim_json$Children, list(new_sim))
}

# Write to new .apsimx file
output_file <- "D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_CCseasonC_1.apsimx"
write(toJSON(apsim_json, auto_unbox = TRUE, pretty = TRUE, null = "null"), output_file)

Point-specific soil and weather

for each to have the specific weather and soils data from the site

library(jsonlite)
library(stringr)
library(dplyr)

# File paths
apsimx_file <- "D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_CCseasonC_1.apsimx"
apsimx_out  <- "D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_FINAL.apsimx"
par_dir     <- "D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/soils_par"
weather_dir <- "D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/weather"

# Update soil node - CORRECTED to use lists
update_soil_from_par <- function(existing_soil, par_path) {
  if (is.null(existing_soil) || is.null(existing_soil$Children)) {
    warning("existing_soil is NULL")
    return(existing_soil)
  }
  
  if (!file.exists(par_path)) {
    cat("   ❌ PAR FILE NOT FOUND:", par_path, "\n")
    return(existing_soil)
  }
  
  lines <- readLines(par_path)
  data <- lines[!str_starts(lines, "\\*") & str_detect(lines, "=")]
  
  if (length(data) == 0) {
    cat("   ❌ NO PARAMETERS FOUND!\n")
    return(existing_soil)
  }
  
  data <- str_split_fixed(data, "=", 2)
  df <- as.data.frame(data, stringsAsFactors = FALSE)
  colnames(df) <- c("key", "value")
  df <- df %>% mutate(key = tolower(trimws(key)), value = trimws(value))
  
  get_val <- function(k) df$value[df$key == tolower(k)]
  
  # CRITICAL FIX: Convert to LIST, not vector
  parse_num_vec <- function(k) {
    if (tolower(k) %in% df$key) {
      vals <- as.numeric(unlist(str_split(get_val(k), ",")))
      return(as.list(vals))  # ← RETURNS LIST!
    } else {
      return(NULL)
    }
  }

  # Find Physical
  physical <- NULL
  for (child in existing_soil$Children) {
    if (!is.null(child$Name) && child$Name == "Physical") {
      physical <- child
      break
    }
  }
  
  if (!is.null(physical)) {
    if (!is.null(v <- parse_num_vec("dlayer")))  physical$Thickness <- v
    if (!is.null(v <- parse_num_vec("bd")))      physical$BD <- v
    if (!is.null(v <- parse_num_vec("airdry")))  physical$AirDry <- v
    if (!is.null(v <- parse_num_vec("ll15")))    physical$LL15 <- v
    if (!is.null(v <- parse_num_vec("dul")))     physical$DUL  <- v
    if (!is.null(v <- parse_num_vec("sat")))     physical$SAT  <- v
    if (!is.null(v <- parse_num_vec("ks")))      physical$KS   <- v
    
    sand <- parse_num_vec("sand")
    clay <- parse_num_vec("clay")
    if (!is.null(sand)) physical$ParticleSizeSand <- sand
    if (!is.null(clay)) physical$ParticleSizeClay <- clay
    if (!is.null(sand) && !is.null(clay)) {
      silt_vals <- as.list(100 - unlist(sand) - unlist(clay))
      silt_vals <- lapply(silt_vals, function(x) max(x, 0))
      physical$ParticleSizeSilt <- silt_vals
    }
    
    # Update Physical back
    for (idx in seq_along(existing_soil$Children)) {
      if (!is.null(existing_soil$Children[[idx]]$Name) && 
          existing_soil$Children[[idx]]$Name == "Physical") {
        existing_soil$Children[[idx]] <- physical
        break
      }
    }
  }

  # Find and update Organic
  organic <- NULL
  for (child in existing_soil$Children) {
    if (!is.null(child$Name) && child$Name == "Organic") {
      organic <- child
      break
    }
  }
  
  if (!is.null(organic)) {
    if (!is.null(v <- parse_num_vec("carbon"))) {
      organic$Carbon <- v
      organic$Thickness <- parse_num_vec("dlayer")
      for (idx in seq_along(existing_soil$Children)) {
        if (!is.null(existing_soil$Children[[idx]]$Name) && 
            existing_soil$Children[[idx]]$Name == "Organic") {
          existing_soil$Children[[idx]] <- organic
          break
        }
      }
    }
  }

  # Find and update Chemical
  chemical <- NULL
  for (child in existing_soil$Children) {
    if (!is.null(child$Name) && child$Name == "Chemical") {
      chemical <- child
      break
    }
  }
  
  if (!is.null(chemical)) {
    if (!is.null(v <- parse_num_vec("ph")))  chemical$PH <- v
    if (!is.null(v <- parse_num_vec("cec"))) chemical$CEC <- v
    if (!is.null(v <- parse_num_vec("ec")))  chemical$EC <- v
    if (!is.null(v <- parse_num_vec("dlayer"))) chemical$Thickness <- v
    for (idx in seq_along(existing_soil$Children)) {
      if (!is.null(existing_soil$Children[[idx]]$Name) && 
          existing_soil$Children[[idx]]$Name == "Chemical") {
        existing_soil$Children[[idx]] <- chemical
        break
      }
    }
  }

  # Update metadata
  if (!is.null(v <- get_val("latitude")))  existing_soil$Latitude <- as.numeric(v)
  if (!is.null(v <- get_val("longitude"))) existing_soil$Longitude <- as.numeric(v)
  if (!is.null(v <- get_val("site")))      existing_soil$Site     <- v
  if (!is.null(v <- get_val("country")))   existing_soil$Country  <- v

  return(existing_soil)
}

# Load APSIMX
cat("Loading APSIMX file...\n")
## Loading APSIMX file...
apsim_json <- fromJSON(apsimx_file, simplifyVector = FALSE)
cat("✓ Loaded", length(apsim_json$Children), "children\n\n")
## ✓ Loaded 201 children
# Update all simulations
updates_made <- 0
for (i in 2:length(apsim_json$Children)) {
  sim <- apsim_json$Children[[i]]
  
  if (is.null(sim$`$type`) || sim$`$type` != "Models.Core.Simulation, Models") {
    next
  }
  
  sim_name <- sim$Name
  point_id <- str_extract(sim_name, "point\\d+")
  
  if (is.na(point_id)) {
    cat("❌ Could not extract point ID from:", sim_name, "\n")
    next
  }
  
  cat("Processing:", sim_name, "\n")
  
  par_file <- file.path(par_dir, paste0(point_id, ".par"))
  weather_file <- file.path(weather_dir, paste0(point_id, ".met"))

  # Update Weather
  for (j in seq_along(sim$Children)) {
    if (!is.null(sim$Children[[j]]$`$type`) && 
        sim$Children[[j]]$`$type` == "Models.Climate.Weather, Models") {
      sim$Children[[j]]$FileName <- weather_file
    }
  }

  # Update Soil
  soil_updated <- FALSE
  for (j in seq_along(sim$Children)) {
    if (!is.null(sim$Children[[j]]$Name) && sim$Children[[j]]$Name == "Field") {
      field <- sim$Children[[j]]
      
      for (k in seq_along(field$Children)) {
        if (!is.null(field$Children[[k]]$Name) && field$Children[[k]]$Name == "Soil") {
          existing_soil <- field$Children[[k]]
          updated_soil <- update_soil_from_par(existing_soil, par_file)
          field$Children[[k]] <- updated_soil
          soil_updated <- TRUE
          
          sim$Children[[j]] <- field
          break
        }
      }
      
      if (soil_updated) break
    }
  }
  
  apsim_json$Children[[i]] <- sim
  updates_made <- updates_made + 1
  
  if (updates_made %% 50 == 0) cat("  ✓", updates_made, "completed\n")
}
## Processing: Soybean_point1 
## Processing: Soybean_point2 
## Processing: Soybean_point3 
## Processing: Soybean_point4 
## Processing: Soybean_point5 
## Processing: Soybean_point6 
## Processing: Soybean_point7 
## Processing: Soybean_point8 
## Processing: Soybean_point9 
## Processing: Soybean_point10 
## Processing: Soybean_point11 
## Processing: Soybean_point12 
## Processing: Soybean_point13 
## Processing: Soybean_point14 
## Processing: Soybean_point15 
## Processing: Soybean_point16 
## Processing: Soybean_point17 
## Processing: Soybean_point18 
## Processing: Soybean_point19 
## Processing: Soybean_point20 
## Processing: Soybean_point21 
## Processing: Soybean_point22 
## Processing: Soybean_point23 
## Processing: Soybean_point24 
## Processing: Soybean_point25 
## Processing: Soybean_point26 
## Processing: Soybean_point27 
## Processing: Soybean_point28 
## Processing: Soybean_point29 
## Processing: Soybean_point30 
## Processing: Soybean_point31 
## Processing: Soybean_point32 
## Processing: Soybean_point33 
## Processing: Soybean_point34 
## Processing: Soybean_point35 
## Processing: Soybean_point36 
## Processing: Soybean_point37 
## Processing: Soybean_point38 
## Processing: Soybean_point39 
## Processing: Soybean_point40 
## Processing: Soybean_point41 
## Processing: Soybean_point42 
## Processing: Soybean_point43 
## Processing: Soybean_point44 
## Processing: Soybean_point45 
## Processing: Soybean_point46 
## Processing: Soybean_point47 
## Processing: Soybean_point48 
## Processing: Soybean_point49 
## Processing: Soybean_point50 
##   ✓ 50 completed
## Processing: Soybean_point51 
## Processing: Soybean_point52 
## Processing: Soybean_point53 
## Processing: Soybean_point54 
## Processing: Soybean_point55 
## Processing: Soybean_point56 
## Processing: Soybean_point57 
## Processing: Soybean_point58 
## Processing: Soybean_point59 
## Processing: Soybean_point60 
## Processing: Soybean_point61 
## Processing: Soybean_point62 
## Processing: Soybean_point63 
## Processing: Soybean_point64 
## Processing: Soybean_point65 
## Processing: Soybean_point66 
## Processing: Soybean_point67 
## Processing: Soybean_point68 
## Processing: Soybean_point69 
## Processing: Soybean_point70 
## Processing: Soybean_point71 
## Processing: Soybean_point72 
## Processing: Soybean_point73 
## Processing: Soybean_point74 
## Processing: Soybean_point75 
## Processing: Soybean_point76 
## Processing: Soybean_point77 
## Processing: Soybean_point78 
## Processing: Soybean_point79 
## Processing: Soybean_point80 
## Processing: Soybean_point81 
## Processing: Soybean_point82 
## Processing: Soybean_point83 
## Processing: Soybean_point84 
## Processing: Soybean_point85 
## Processing: Soybean_point86 
## Processing: Soybean_point87 
## Processing: Soybean_point88 
## Processing: Soybean_point89 
## Processing: Soybean_point90 
## Processing: Soybean_point91 
## Processing: Soybean_point92 
## Processing: Soybean_point93 
## Processing: Soybean_point94 
## Processing: Soybean_point95 
## Processing: Soybean_point96 
## Processing: Soybean_point97 
## Processing: Soybean_point98 
## Processing: Soybean_point99 
## Processing: Soybean_point100 
##   ✓ 100 completed
## Processing: Soybean_point101 
## Processing: Soybean_point102 
## Processing: Soybean_point103 
## Processing: Soybean_point104 
## Processing: Soybean_point105 
## Processing: Soybean_point106 
## Processing: Soybean_point107 
## Processing: Soybean_point108 
## Processing: Soybean_point109 
## Processing: Soybean_point110 
## Processing: Soybean_point111 
## Processing: Soybean_point112 
## Processing: Soybean_point113 
## Processing: Soybean_point114 
## Processing: Soybean_point115 
## Processing: Soybean_point116 
## Processing: Soybean_point117 
## Processing: Soybean_point118 
## Processing: Soybean_point119 
## Processing: Soybean_point120 
## Processing: Soybean_point121 
## Processing: Soybean_point122 
## Processing: Soybean_point123 
## Processing: Soybean_point124 
## Processing: Soybean_point125 
## Processing: Soybean_point126 
## Processing: Soybean_point127 
## Processing: Soybean_point128 
## Processing: Soybean_point129 
## Processing: Soybean_point130 
## Processing: Soybean_point131 
## Processing: Soybean_point132 
## Processing: Soybean_point133 
## Processing: Soybean_point134 
## Processing: Soybean_point135 
## Processing: Soybean_point136 
## Processing: Soybean_point137 
## Processing: Soybean_point138 
## Processing: Soybean_point139 
## Processing: Soybean_point140 
## Processing: Soybean_point141 
## Processing: Soybean_point142 
## Processing: Soybean_point143 
## Processing: Soybean_point144 
## Processing: Soybean_point145 
## Processing: Soybean_point146 
## Processing: Soybean_point147 
## Processing: Soybean_point148 
## Processing: Soybean_point149 
## Processing: Soybean_point150 
##   ✓ 150 completed
## Processing: Soybean_point151 
## Processing: Soybean_point152 
## Processing: Soybean_point153 
## Processing: Soybean_point154 
## Processing: Soybean_point155 
## Processing: Soybean_point156 
## Processing: Soybean_point157 
## Processing: Soybean_point158 
## Processing: Soybean_point159 
## Processing: Soybean_point160 
## Processing: Soybean_point161 
## Processing: Soybean_point162 
## Processing: Soybean_point163 
## Processing: Soybean_point164 
## Processing: Soybean_point165 
## Processing: Soybean_point166 
## Processing: Soybean_point167 
## Processing: Soybean_point168 
## Processing: Soybean_point169 
## Processing: Soybean_point170 
## Processing: Soybean_point171 
## Processing: Soybean_point172 
## Processing: Soybean_point173 
## Processing: Soybean_point174 
## Processing: Soybean_point175 
## Processing: Soybean_point176 
## Processing: Soybean_point177 
## Processing: Soybean_point178 
## Processing: Soybean_point179 
## Processing: Soybean_point180 
## Processing: Soybean_point181 
## Processing: Soybean_point182 
## Processing: Soybean_point183 
## Processing: Soybean_point184 
## Processing: Soybean_point185 
## Processing: Soybean_point186 
## Processing: Soybean_point187 
## Processing: Soybean_point188 
## Processing: Soybean_point189 
## Processing: Soybean_point190 
## Processing: Soybean_point191 
## Processing: Soybean_point192 
## Processing: Soybean_point193 
## Processing: Soybean_point194 
## Processing: Soybean_point195 
## Processing: Soybean_point196 
## Processing: Soybean_point197 
## Processing: Soybean_point198 
## Processing: Soybean_point199 
## Processing: Soybean_point200 
##   ✓ 200 completed
cat("\n" , 60, "═\n")
## 
##  60 ═
cat("SUMMARY: Updated", updates_made, "simulations\n")
## SUMMARY: Updated 200 simulations
cat(60, "═\n\n")
## 60 ═
if (updates_made > 0) {
  cat("Writing APSIMX file...\n")
  json_text <- toJSON(apsim_json, pretty = TRUE, auto_unbox = TRUE, null = "null")
  writeLines(json_text, apsimx_out)
  cat("✅ COMPLETE! Saved to:\n", apsimx_out, "\n")
} else {
  cat("❌ NO UPDATES WERE MADE!\n")
}
## Writing APSIMX file...
## ✅ COMPLETE! Saved to:
##  D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_FINAL.apsimx

— 2. Analysis of the outputs from APSIM—-

Upload files ouput files (3)

There is 3 output files: 1) Daily = daily recorded vairables 2) dates_sow = dates of soeing date for soyabeans 3) dates_mat = dates of maturity of the soyabeans

library(readxl)
library(ggplot2)
library(tidyr)
library(dplyr)

dates_mat <- read.csv("D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_FINAL.Report_dates_mat.csv")

dates_sow <- read.csv("D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_FINAL.Report_dates_sow.csv")

daily <- read.csv("D:/Mes Donnees/R/Biomass_Crops/Modeling/APSIM/apsimx_runs/soya_seasonB_FINAL.Report_daily.csv")

Planting dates of the soya per ag_zone

library(dplyr)
library(ggplot2)
library(stringr)

# Convert Clock.Today from character to Date
dates_sow <- dates_sow %>%
  mutate(Clock.Today = as.Date(Clock.Today))

# Create zone mapping
dates_with_zones <- dates_sow %>%
  mutate(
    # Extract point number from SimulationName
    point_num = as.numeric(str_extract(SimulationName, "\\d+$")),
    # Assign to zone (1-20)
    Zone_num = ceiling(point_num / 20),
    # Convert to day of year (1-365)
    DayOfYear = as.numeric(format(Clock.Today, "%j")),
    Year = as.numeric(format(Clock.Today, "%Y"))
  )

# Create cumulative frequency by day of year for each zone
cumulative_freq <- dates_with_zones %>%
  group_by(Zone_num, DayOfYear) %>%
  summarise(
    count = n(),
    .groups = "drop"
  ) %>%
  arrange(Zone_num, DayOfYear) %>%
  group_by(Zone_num) %>%
  mutate(
    cumulative_count = cumsum(count),
    cumulative_percent = (cumulative_count / max(cumulative_count)) * 100
  ) %>%
  ungroup() %>%
  rename(Zone = Zone_num)

# Calculate N (total observations) for each zone
zone_n <- cumulative_freq %>%
  group_by(Zone) %>%
  summarise(
    N = max(cumulative_count),
    .groups = "drop"
  ) %>%
  mutate(Zone_label = paste0("Zone ", Zone, " (N=", N, ")"))

# Add zone labels to data
cumulative_freq <- cumulative_freq %>%
  left_join(zone_n, by = "Zone")

# Visualization 1: Faceted by zone
p1 <- ggplot(cumulative_freq, aes(x = DayOfYear, y = cumulative_count, color = factor(Zone))) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  facet_wrap(~Zone_label, ncol = 4) +
  labs(
    title = "Cumulative Sowing Frequency by Day of Year - By Zone",
    x = "Day of Year (1-365)",
    y = "Cumulative Frequency",
    color = "Zone"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    strip.text = element_text(size = 10, face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
    legend.position = "bottom"
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(p1)

# Visualization 2: All zones together
p2 <- ggplot(cumulative_freq, aes(x = DayOfYear, y = cumulative_count, color = Zone_label, group = Zone)) +
  geom_line(size = 1.2, alpha = 0.8) +
  labs(
    title = "Cumulative Sowing Frequency by Day of Year - All Zones",
    x = "Day of Year (1-365)",
    y = "Cumulative Frequency",
    color = "Zone (N)"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    legend.position = "right",
    legend.text = element_text(size = 9)
  )

print(p2)

# Visualization 3: Percentage
p3 <- ggplot(cumulative_freq, aes(x = DayOfYear, y = cumulative_percent, color = Zone_label, group = Zone)) +
  geom_line(size = 1.2, alpha = 0.8) +
  labs(
    title = "Cumulative Sowing Percentage by Day of Year",
    x = "Day of Year (1-365)",
    y = "Cumulative Percentage (%)",
    color = "Zone (N)"
  ) +
  ylim(0, 100) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    legend.position = "right"
  )

print(p3)

# Summary table
summary_table <- zone_n %>%
  left_join(
    cumulative_freq %>%
      group_by(Zone) %>%
      summarise(
        Min_DOY = min(DayOfYear),
        Max_DOY = max(DayOfYear),
        DOY_Range = Max_DOY - Min_DOY,
        .groups = "drop"
      ),
    by = "Zone"
  ) %>%
  select(Zone, N, Min_DOY, Max_DOY, DOY_Range)

cat("\n╔════════════════════════════════════════════════════════╗\n")
## 
## ╔════════════════════════════════════════════════════════╗
cat("║         SUMMARY STATISTICS BY ZONE                     ║\n")
## ║         SUMMARY STATISTICS BY ZONE                     ║
cat("╚════════════════════════════════════════════════════════╝\n\n")
## ╚════════════════════════════════════════════════════════╝
print(summary_table, n = 20)
## # A tibble: 10 × 5
##     Zone     N Min_DOY Max_DOY DOY_Range
##    <dbl> <int>   <dbl>   <dbl>     <dbl>
##  1     1   412      22      60        38
##  2     2   420      22      60        38
##  3     3   420      22      60        38
##  4     4   420      22      60        38
##  5     5   386      22      71        49
##  6     6   420      22      60        38
##  7     7   420      22      61        39
##  8     8   420      22      61        39
##  9     9   420      22      60        38
## 10    10   408      22      64        42

Add a variables “year”, “DOY”, “Day”, “point_year”

year = year of season of CC grown DOY= day of the year when the CC is planted Day= Day (DD-MM) when the CC in seeded point_year= Unique ID of point - year combination

# Extract year, day of year (DOY), and month-day (Day) from Clock.Today for all three dataframes

# For dates_mat (Clock.Today is character)
dates_mat$year <- as.numeric(substr(dates_mat$Clock.Today, 1, 4))
dates_mat$Day <- substr(dates_mat$Clock.Today, 6, 10)  # MM-DD format
dates_mat$DOY <- as.numeric(format(as.Date(dates_mat$Clock.Today), "%j"))

# For dates_sow (Clock.Today is Date format)
dates_sow$year <- lubridate::year(dates_sow$Clock.Today)
# Alternative without lubridate:
# dates_sow$year <- as.numeric(format(dates_sow$Clock.Today, "%Y"))
dates_sow$Day <- format(dates_sow$Clock.Today, "%m-%d")
dates_sow$DOY <- as.numeric(format(dates_sow$Clock.Today, "%j"))

# For daily (Clock.Today is character)
daily$year <- as.numeric(substr(daily$Clock.Today, 1, 4))
daily$Day <- substr(daily$Clock.Today, 6, 10)  # MM-DD format
daily$DOY <- as.numeric(format(as.Date(daily$Clock.Today), "%j"))

# Verify the new columns were added correctly
str(dates_mat[, c("year", "DOY", "Day")])
## 'data.frame':    4144 obs. of  3 variables:
##  $ year: num  2005 2006 2007 2008 2009 ...
##  $ DOY : num  138 157 147 146 147 156 169 176 146 149 ...
##  $ Day : chr  "05-18" "06-06" "05-27" "05-25" ...
str(dates_sow[, c("year", "DOY", "Day")])
## 'data.frame':    4146 obs. of  3 variables:
##  $ year: num  2005 2006 2007 2008 2009 ...
##  $ DOY : num  27 35 22 22 22 39 50 56 31 29 ...
##  $ Day : chr  "01-27" "02-04" "01-22" "01-22" ...
str(daily[, c("year", "DOY", "Day")])
## 'data.frame':    1499373 obs. of  3 variables:
##  $ year: num  2005 2005 2005 2005 2005 ...
##  $ DOY : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ Day : chr  "01-01" "01-02" "01-03" "01-04" ...
# Quick check to see the values
head(dates_mat[, c("Clock.Today", "year", "DOY", "Day")], 10)
##    Clock.Today year DOY   Day
## 1   2005-05-18 2005 138 05-18
## 2   2006-06-06 2006 157 06-06
## 3   2007-05-27 2007 147 05-27
## 4   2008-05-25 2008 146 05-25
## 5   2009-05-27 2009 147 05-27
## 6   2010-06-05 2010 156 06-05
## 7   2011-06-18 2011 169 06-18
## 8   2012-06-24 2012 176 06-24
## 9   2013-05-26 2013 146 05-26
## 10  2014-05-29 2014 149 05-29
head(dates_sow[, c("Clock.Today", "year", "DOY", "Day")], 10)
##    Clock.Today year DOY   Day
## 1   2005-01-27 2005  27 01-27
## 2   2006-02-04 2006  35 02-04
## 3   2007-01-22 2007  22 01-22
## 4   2008-01-22 2008  22 01-22
## 5   2009-01-22 2009  22 01-22
## 6   2010-02-08 2010  39 02-08
## 7   2011-02-19 2011  50 02-19
## 8   2012-02-25 2012  56 02-25
## 9   2013-01-31 2013  31 01-31
## 10  2014-01-29 2014  29 01-29
head(daily[, c("Clock.Today", "year", "DOY", "Day")], 10)
##    Clock.Today year DOY   Day
## 1   2005-01-01 2005   1 01-01
## 2   2005-01-02 2005   2 01-02
## 3   2005-01-03 2005   3 01-03
## 4   2005-01-04 2005   4 01-04
## 5   2005-01-05 2005   5 01-05
## 6   2005-01-06 2005   6 01-06
## 7   2005-01-07 2005   7 01-07
## 8   2005-01-08 2005   8 01-08
## 9   2005-01-09 2005   9 01-09
## 10  2005-01-10 2005  10 01-10
# Now the unique ID

# Create point_season variable for all three dataframes
# Format: point_number_year (e.g., 34_2010)

# For dates_mat
dates_mat$point_season <- paste(
  gsub("Soybean_point", "", dates_mat$SimulationName),
  dates_mat$year,
  sep = "_"
)

# For dates_sow
dates_sow$point_season <- paste(
  gsub("Soybean_point", "", dates_sow$SimulationName),
  dates_sow$year,
  sep = "_"
)

# For daily
daily$point_season <- paste(
  gsub("Soybean_point", "", daily$SimulationName),
  daily$year,
  sep = "_"
)

# Verify the new point_season column
head(dates_mat[, c("SimulationName", "year", "point_season")], 10)
##    SimulationName year point_season
## 1  Soybean_point1 2005       1_2005
## 2  Soybean_point1 2006       1_2006
## 3  Soybean_point1 2007       1_2007
## 4  Soybean_point1 2008       1_2008
## 5  Soybean_point1 2009       1_2009
## 6  Soybean_point1 2010       1_2010
## 7  Soybean_point1 2011       1_2011
## 8  Soybean_point1 2012       1_2012
## 9  Soybean_point1 2013       1_2013
## 10 Soybean_point1 2014       1_2014
head(dates_sow[, c("SimulationName", "year", "point_season")], 10)
##    SimulationName year point_season
## 1  Soybean_point1 2005       1_2005
## 2  Soybean_point1 2006       1_2006
## 3  Soybean_point1 2007       1_2007
## 4  Soybean_point1 2008       1_2008
## 5  Soybean_point1 2009       1_2009
## 6  Soybean_point1 2010       1_2010
## 7  Soybean_point1 2011       1_2011
## 8  Soybean_point1 2012       1_2012
## 9  Soybean_point1 2013       1_2013
## 10 Soybean_point1 2014       1_2014
head(daily[, c("SimulationName", "year", "point_season")], 10)
##    SimulationName year point_season
## 1  Soybean_point1 2005       1_2005
## 2  Soybean_point1 2005       1_2005
## 3  Soybean_point1 2005       1_2005
## 4  Soybean_point1 2005       1_2005
## 5  Soybean_point1 2005       1_2005
## 6  Soybean_point1 2005       1_2005
## 7  Soybean_point1 2005       1_2005
## 8  Soybean_point1 2005       1_2005
## 9  Soybean_point1 2005       1_2005
## 10 Soybean_point1 2005       1_2005
# Check unique point_season values
cat("Unique point_season values in dates_mat:\n")
## Unique point_season values in dates_mat:
print(n_distinct(dates_mat$point_season))
## [1] 4144
cat("Unique point_season values in dates_sow:\n")
## Unique point_season values in dates_sow:
print(n_distinct(dates_sow$point_season))
## [1] 4146
cat("Unique point_season values in daily:\n")
## Unique point_season values in daily:
print(n_distinct(daily$point_season))
## [1] 4172

Add zone (1 to 10)

# Add ag_zone variable to daily, dates_mat, and dates_sow
# ag_zone is based on point number from point_season:
# 1-20 = zone 1, 21-40 = zone 2, ..., 181-200 = zone 20

library(dplyr)

# ===== For dates_mat =====
# Extract point number from point_season (remove the "_YYYY" part)
dates_mat$point_number <- as.numeric(gsub("_.*", "", dates_mat$point_season))

# Calculate ag_zone: ceiling(point_number / 20)
dates_mat$ag_zone <- ceiling(dates_mat$point_number / 20)

cat("=== dates_mat ag_zone ===\n")
## === dates_mat ag_zone ===
cat("Point number range:", range(dates_mat$point_number, na.rm = TRUE), "\n")
## Point number range: 1 200
cat("ag_zone range:", range(dates_mat$ag_zone, na.rm = TRUE), "\n")
## ag_zone range: 1 10
print(table(dates_mat$ag_zone))
## 
##   1   2   3   4   5   6   7   8   9  10 
## 411 420 420 420 385 420 420 420 420 408
# ===== For dates_sow =====
# Extract point number from point_season
dates_sow$point_number <- as.numeric(gsub("_.*", "", dates_sow$point_season))

# Calculate ag_zone
dates_sow$ag_zone <- ceiling(dates_sow$point_number / 20)

cat("\n=== dates_sow ag_zone ===\n")
## 
## === dates_sow ag_zone ===
cat("Point number range:", range(dates_sow$point_number, na.rm = TRUE), "\n")
## Point number range: 1 200
cat("ag_zone range:", range(dates_sow$ag_zone, na.rm = TRUE), "\n")
## ag_zone range: 1 10
print(table(dates_sow$ag_zone))
## 
##   1   2   3   4   5   6   7   8   9  10 
## 412 420 420 420 386 420 420 420 420 408
# ===== For daily =====
# Extract point number from point_season
daily$point_number <- as.numeric(gsub("_.*", "", daily$point_season))

# Calculate ag_zone
daily$ag_zone <- ceiling(daily$point_number / 20)

cat("\n=== daily ag_zone ===\n")
## 
## === daily ag_zone ===
cat("Point number range:", range(daily$point_number, na.rm = TRUE), "\n")
## Point number range: 1 200
cat("ag_zone range:", range(daily$ag_zone, na.rm = TRUE), "\n")
## ag_zone range: 1 10
print(table(daily$ag_zone))
## 
##      1      2      3      4      5      6      7      8      9     10 
## 147977 150980 150980 150980 143556 150980 150980 150980 150980 150980
# ===== Verify with examples =====
cat("\n=== Verification Examples ===\n")
## 
## === Verification Examples ===
cat("Points 1-20 should be ag_zone 1:\n")
## Points 1-20 should be ag_zone 1:
print(unique(dates_mat$ag_zone[dates_mat$point_number >= 1 & dates_mat$point_number <= 20]))
## [1] 1
cat("Points 21-40 should be ag_zone 2:\n")
## Points 21-40 should be ag_zone 2:
print(unique(dates_mat$ag_zone[dates_mat$point_number >= 21 & dates_mat$point_number <= 40]))
## [1] 2
cat("Points 181-200 should be ag_zone 20:\n")
## Points 181-200 should be ag_zone 20:
print(unique(dates_mat$ag_zone[dates_mat$point_number >= 181 & dates_mat$point_number <= 200]))
## [1] 10
cat("\nDone!\n")
## 
## Done!

Sowing rule for crotolaria broadcasted as cc

Add the range of dates to seed CC

This is the period range when we monitor conditions to broadcast a cover crop before soya is harvested = maturity minus 30 days of the soyabeans

# Add monitoring period variables to dates_mat for cover crop seeding decision
# start_monitoring: 30 days before soybean harvest (Clock.Today - 30 days)
# end_monitoring: soybean harvest date (Clock.Today)

# First, convert Clock.Today to Date format if it isn't already
dates_mat$Clock.Today_date <- as.Date(dates_mat$Clock.Today)

# Create start_monitoring: 30 days before harvest
dates_mat$start_monitoring <- dates_mat$Clock.Today_date - 30

# Create end_monitoring: harvest date (same as Clock.Today but as Date)
dates_mat$end_monitoring <- dates_mat$Clock.Today_date

# Verify the new variables
head(dates_mat[, c("SimulationName", "year", "point_season", "Clock.Today", 
                   "start_monitoring", "end_monitoring")], 15)
##    SimulationName year point_season Clock.Today start_monitoring end_monitoring
## 1  Soybean_point1 2005       1_2005  2005-05-18       2005-04-18     2005-05-18
## 2  Soybean_point1 2006       1_2006  2006-06-06       2006-05-07     2006-06-06
## 3  Soybean_point1 2007       1_2007  2007-05-27       2007-04-27     2007-05-27
## 4  Soybean_point1 2008       1_2008  2008-05-25       2008-04-25     2008-05-25
## 5  Soybean_point1 2009       1_2009  2009-05-27       2009-04-27     2009-05-27
## 6  Soybean_point1 2010       1_2010  2010-06-05       2010-05-06     2010-06-05
## 7  Soybean_point1 2011       1_2011  2011-06-18       2011-05-19     2011-06-18
## 8  Soybean_point1 2012       1_2012  2012-06-24       2012-05-25     2012-06-24
## 9  Soybean_point1 2013       1_2013  2013-05-26       2013-04-26     2013-05-26
## 10 Soybean_point1 2014       1_2014  2014-05-29       2014-04-29     2014-05-29
## 11 Soybean_point1 2015       1_2015  2015-06-20       2015-05-21     2015-06-20
## 12 Soybean_point1 2016       1_2016  2016-05-03       2016-04-03     2016-05-03
## 13 Soybean_point1 2017       1_2017  2017-05-23       2017-04-23     2017-05-23
## 14 Soybean_point1 2018       1_2018  2018-07-05       2018-06-05     2018-07-05
## 15 Soybean_point1 2019       1_2019  2019-06-14       2019-05-15     2019-06-14
# Check the structure
str(dates_mat[, c("start_monitoring", "end_monitoring")])
## 'data.frame':    4144 obs. of  2 variables:
##  $ start_monitoring: Date, format: "2005-04-18" "2006-05-07" ...
##  $ end_monitoring  : Date, format: "2005-05-18" "2006-06-06" ...
# Summary to confirm dates look reasonable
cat("Start monitoring range:\n")
## Start monitoring range:
print(range(dates_mat$start_monitoring, na.rm = TRUE))
## [1] "2005-04-08" "2025-06-06"
cat("End monitoring range:\n")
## End monitoring range:
print(range(dates_mat$end_monitoring, na.rm = TRUE))
## [1] "2005-05-08" "2025-07-06"
# Check the difference (should be ~30 days)
cat("Difference between end_monitoring and start_monitoring (should be ~30 days):\n")
## Difference between end_monitoring and start_monitoring (should be ~30 days):
print(summary(dates_mat$end_monitoring - dates_mat$start_monitoring))
##   Length    Class     Mode 
##     4144 difftime  numeric

Add sowing date of CC based on condition:

Recent rainfall: Rain 5 days ago > 40 mm Rain 4 days ago > 35 mm Rain 3 days ago > 30 mm Rain 2 days ago or 1 day ago or same day > 20 mm total Rain accumulated last 5 days > 50 mm Return the first day this condition is met, or “N/P” if never met

# Determine sowing_CC date for cover crop based on rainfall conditions
# SIMPLIFIED VERSION with better error handling

library(dplyr)

# Ensure dates are in Date format
dates_mat$Clock.Today_date <- as.Date(dates_mat$Clock.Today)
daily$Clock.Today_date <- as.Date(daily$Clock.Today)

# Initialize the sowing_CC column
dates_mat$sowing_CC <- NA_character_

# Create a lookup table from daily data for faster access
daily_sorted <- daily %>%
  select(point_season, Clock.Today_date, Weather.Rain) %>%
  arrange(point_season, Clock.Today_date)

# Get unique point_season values
unique_points <- unique(dates_mat$point_season)

# Process each unique point_season
for (ps in unique_points) {
  
  # Get all rows for this point_season in dates_mat
  mat_rows <- which(dates_mat$point_season == ps)
  
  # Get daily data for this point_season
  daily_ps <- daily_sorted %>% filter(point_season == ps)
  
  if (nrow(daily_ps) == 0) {
    # No data available
    dates_mat$sowing_CC[mat_rows] <- NA
    next
  }
  
  # Process each date row for this point_season
  for (row_idx in mat_rows) {
    
    start_date <- dates_mat$start_monitoring[row_idx]
    end_date <- dates_mat$end_monitoring[row_idx]
    
    # Check each day in the monitoring period
    found_date <- NA_character_
    
    for (check_date in seq(start_date, end_date, by = "day")) {
      
      # Get rainfall values for specific days
      rain_5_days_ago <- daily_ps$Weather.Rain[daily_ps$Clock.Today_date == (check_date - 5)]
      rain_4_days_ago <- daily_ps$Weather.Rain[daily_ps$Clock.Today_date == (check_date - 4)]
      rain_3_days_ago <- daily_ps$Weather.Rain[daily_ps$Clock.Today_date == (check_date - 3)]
      rain_2_days_ago <- daily_ps$Weather.Rain[daily_ps$Clock.Today_date == (check_date - 2)]
      rain_1_day_ago <- daily_ps$Weather.Rain[daily_ps$Clock.Today_date == (check_date - 1)]
      rain_today <- daily_ps$Weather.Rain[daily_ps$Clock.Today_date == check_date]
      
      # Calculate 5-day accumulated rain (4 days before through today)
      rain_5day_sum <- sum(
        daily_ps$Weather.Rain[daily_ps$Clock.Today_date >= (check_date - 4) & 
                              daily_ps$Clock.Today_date <= check_date],
        na.rm = TRUE
      )
      
      # Check conditions
      condition_met <- FALSE
      
      if (length(rain_5_days_ago) > 0 && !is.na(rain_5_days_ago) && rain_5_days_ago > 40) condition_met <- TRUE
      if (length(rain_4_days_ago) > 0 && !is.na(rain_4_days_ago) && rain_4_days_ago > 35) condition_met <- TRUE
      if (length(rain_3_days_ago) > 0 && !is.na(rain_3_days_ago) && rain_3_days_ago > 30) condition_met <- TRUE
      if (length(rain_2_days_ago) > 0 && !is.na(rain_2_days_ago) && rain_2_days_ago > 20) condition_met <- TRUE
      if (length(rain_1_day_ago) > 0 && !is.na(rain_1_day_ago) && rain_1_day_ago > 20) condition_met <- TRUE
      if (length(rain_today) > 0 && !is.na(rain_today) && rain_today > 20) condition_met <- TRUE
      if (rain_5day_sum > 50) condition_met <- TRUE
      
      if (condition_met) {
        found_date <- as.character(check_date)
        break
      }
    }
    
    dates_mat$sowing_CC[row_idx] <- found_date
  }
}

# Display results
cat("\n=== Sowing CC Results ===\n")
## 
## === Sowing CC Results ===
cat("Total rows:", nrow(dates_mat), "\n")
## Total rows: 4144
cat("Rows with sowing date found:", sum(!is.na(dates_mat$sowing_CC)), "\n")
## Rows with sowing date found: 1878
cat("Rows with NO sowing date (NA):", sum(is.na(dates_mat$sowing_CC)), "\n")
## Rows with NO sowing date (NA): 2266
cat("\n=== Sample of Results ===\n")
## 
## === Sample of Results ===
print(head(dates_mat[, c("point_season", "start_monitoring", "end_monitoring", "sowing_CC")], 20))
##    point_season start_monitoring end_monitoring sowing_CC
## 1        1_2005       2005-04-18     2005-05-18      <NA>
## 2        1_2006       2006-05-07     2006-06-06     13281
## 3        1_2007       2007-04-27     2007-05-27     13637
## 4        1_2008       2008-04-25     2008-05-25     14020
## 5        1_2009       2009-04-27     2009-05-27     14379
## 6        1_2010       2010-05-06     2010-06-05      <NA>
## 7        1_2011       2011-05-19     2011-06-18      <NA>
## 8        1_2012       2012-05-25     2012-06-24      <NA>
## 9        1_2013       2013-04-26     2013-05-26     15826
## 10       1_2014       2014-04-29     2014-05-29      <NA>
## 11       1_2015       2015-05-21     2015-06-20      <NA>
## 12       1_2016       2016-04-03     2016-05-03     16894
## 13       1_2017       2017-04-23     2017-05-23      <NA>
## 14       1_2018       2018-06-05     2018-07-05      <NA>
## 15       1_2019       2019-05-15     2019-06-14      <NA>
## 16       1_2020       2020-04-20     2020-05-20     18372
## 17       1_2021       2021-04-24     2021-05-24     18742
## 18       1_2022       2022-04-22     2022-05-22      <NA>
## 19       1_2023       2023-05-27     2023-06-26      <NA>
## 20       1_2024       2024-04-14     2024-05-14     19845
cat("\nDone!\n")
## 
## Done!
# Convert sowing_CC from numeric to Date format
# This converts the numeric representation (days since 1970-01-01) to proper dates

dates_mat$sowing_CC <- as.Date(as.numeric(dates_mat$sowing_CC), origin = "1970-01-01")

Check that the dates for sowing CC are within the monitoring range

# Check that sowing_CC is always between start_monitoring and end_monitoring

# First, convert sowing_CC to Date if not already
dates_mat$sowing_CC <- as.Date(dates_mat$sowing_CC, origin = "1970-01-01")

# Create a check variable
dates_mat$sowing_in_range <- NA

# For rows where sowing_CC is not NA, check if it's in range
dates_mat$sowing_in_range[!is.na(dates_mat$sowing_CC)] <- 
  (dates_mat$sowing_CC[!is.na(dates_mat$sowing_CC)] >= dates_mat$start_monitoring[!is.na(dates_mat$sowing_CC)]) &
  (dates_mat$sowing_CC[!is.na(dates_mat$sowing_CC)] <= dates_mat$end_monitoring[!is.na(dates_mat$sowing_CC)])

# Summary
cat("=== Sowing Date Range Check ===\n")
## === Sowing Date Range Check ===
cat("Total rows:", nrow(dates_mat), "\n")
## Total rows: 4144
cat("Rows with sowing_CC (not NA):", sum(!is.na(dates_mat$sowing_CC)), "\n")
## Rows with sowing_CC (not NA): 1878
cat("Rows with sowing_CC IN range:", sum(dates_mat$sowing_in_range, na.rm = TRUE), "\n")
## Rows with sowing_CC IN range: 1878
cat("Rows with sowing_CC OUT of range:", sum(!dates_mat$sowing_in_range, na.rm = TRUE), "\n")
## Rows with sowing_CC OUT of range: 0
# Show any rows OUT of range
if (sum(!dates_mat$sowing_in_range, na.rm = TRUE) > 0) {
  cat("\n=== ROWS OUT OF RANGE ===\n")
  out_of_range <- dates_mat[dates_mat$sowing_in_range == FALSE, 
                            c("point_season", "start_monitoring", "sowing_CC", "end_monitoring")]
  print(out_of_range)
} else {
  cat("\n✓ All sowing dates are within the monitoring period!\n")
}
## 
## ✓ All sowing dates are within the monitoring period!

Calculate how much available water there is and will be for the CC

The approach: Determine the date when it will emerge (sowing +5 days) For that date determine available water in the 0-50 soil layer Then cacluate how much rain from that daye for the next 2 month Then add the available water + the rain to estimate CC_available water

First we add the variable of water available 0-50 the day of CC sowing

# Extract soil water (PAW) at time of cover crop sowing
# PAW_50_sowCC = Soil.Water.PAWmm.1. value on the day of CC sowing

library(dplyr)

# Step 1: Calculate DOY for sowing_CC in dates_mat
dates_mat$sowing_DOY <- as.numeric(format(dates_mat$sowing_CC, "%j"))

# Step 2: Initialize PAW_50_sowCC variable
dates_mat$PAW_50_sowCC <- NA_real_

# Step 3: For each row where sowing_in_range = TRUE, find matching daily data
for (i in 1:nrow(dates_mat)) {
  
  # Only process rows where CC was sown (sowing_in_range = TRUE)
  if (!is.na(dates_mat$sowing_in_range[i]) && dates_mat$sowing_in_range[i] == TRUE) {
    
    # Get point_season and sowing_DOY for this row
    ps <- dates_mat$point_season[i]
    doy <- dates_mat$sowing_DOY[i]
    
    # Find matching row in daily data
    match_row <- daily %>%
      filter(point_season == ps & DOY == doy) %>%
      select(Soil.Water.PAWmm.1.)
    
    # Extract the value if found
    if (nrow(match_row) > 0) {
      dates_mat$PAW_50_sowCC[i] <- match_row$Soil.Water.PAWmm.1.[1]  # Take first match if multiple
    }
  }
}

# Summary
cat("=== PAW_50_sowCC Results ===\n")
## === PAW_50_sowCC Results ===
cat("Rows with sowing_in_range = TRUE:", sum(dates_mat$sowing_in_range, na.rm = TRUE), "\n")
## Rows with sowing_in_range = TRUE: 1878
cat("Rows with PAW_50_sowCC value found:", sum(!is.na(dates_mat$PAW_50_sowCC)), "\n")
## Rows with PAW_50_sowCC value found: 1878
cat("Rows with PAW_50_sowCC missing:", sum(is.na(dates_mat$PAW_50_sowCC[dates_mat$sowing_in_range == TRUE]), na.rm = TRUE), "\n")
## Rows with PAW_50_sowCC missing: 2266
cat("\nBasic statistics of PAW_50_sowCC:\n")
## 
## Basic statistics of PAW_50_sowCC:
print(summary(dates_mat$PAW_50_sowCC))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   4.666   8.915   9.630   9.621  10.258  12.642    2266
cat("\n=== Sample Results ===\n")
## 
## === Sample Results ===
print(head(dates_mat[dates_mat$sowing_in_range == TRUE, 
                     c("point_season", "sowing_CC", "sowing_DOY", "PAW_50_sowCC")], 15))
##      point_season  sowing_CC sowing_DOY PAW_50_sowCC
## NA           <NA>       <NA>         NA           NA
## 2          1_2006 2006-05-13        133        9.789
## 3          1_2007 2007-05-04        124        9.651
## 4          1_2008 2008-05-21        142        9.454
## 5          1_2009 2009-05-15        135        9.648
## NA.1         <NA>       <NA>         NA           NA
## NA.2         <NA>       <NA>         NA           NA
## NA.3         <NA>       <NA>         NA           NA
## 9          1_2013 2013-05-01        121        9.726
## NA.4         <NA>       <NA>         NA           NA
## NA.5         <NA>       <NA>         NA           NA
## 12         1_2016 2016-04-03         94        9.881
## NA.6         <NA>       <NA>         NA           NA
## NA.7         <NA>       <NA>         NA           NA
## NA.8         <NA>       <NA>         NA           NA

Then calculate the accumulated rains from sowing the CC to 60 days after

# Calculate total rainfall from CC sowing to 60 days after
# rain_CC = sum of Weather.Rain from sowing_CC to sowing_CC + 60 days

library(dplyr)

# Initialize rain_CC variable
dates_mat$rain_CC <- NA_real_

# For each row where sowing_in_range = TRUE, sum rainfall for 60 days after sowing
for (i in 1:nrow(dates_mat)) {
  
  # Only process rows where CC was sown (sowing_in_range = TRUE)
  if (!is.na(dates_mat$sowing_in_range[i]) && dates_mat$sowing_in_range[i] == TRUE) {
    
    # Get point_season and sowing date
    ps <- dates_mat$point_season[i]
    sowing_date <- dates_mat$sowing_CC[i]
    end_date <- sowing_date + 60  # 60 days after sowing
    
    # Sum rainfall from sowing date to 60 days after
    rain_sum <- daily %>%
      filter(point_season == ps & 
             Clock.Today_date >= sowing_date & 
             Clock.Today_date <= end_date) %>%
      pull(Weather.Rain) %>%
      sum(na.rm = TRUE)
    
    # Store the sum
    dates_mat$rain_CC[i] <- rain_sum
  }
}

# Summary
cat("=== rain_CC Results ===\n")
## === rain_CC Results ===
cat("Rows with sowing_in_range = TRUE:", sum(dates_mat$sowing_in_range, na.rm = TRUE), "\n")
## Rows with sowing_in_range = TRUE: 1878
cat("Rows with rain_CC value found:", sum(!is.na(dates_mat$rain_CC)), "\n")
## Rows with rain_CC value found: 1878
cat("\nBasic statistics of rain_CC (mm):\n")
## 
## Basic statistics of rain_CC (mm):
print(summary(dates_mat$rain_CC))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    26.7   105.3   162.2   191.8   259.3   511.8    2266
cat("\n=== Sample Results ===\n")
## 
## === Sample Results ===
print(head(dates_mat[dates_mat$sowing_in_range == TRUE, 
                     c("point_season", "sowing_CC", "PAW_50_sowCC", "rain_CC")], 15))
##      point_season  sowing_CC PAW_50_sowCC rain_CC
## NA           <NA>       <NA>           NA      NA
## 2          1_2006 2006-05-13        9.789    52.8
## 3          1_2007 2007-05-04        9.651   364.2
## 4          1_2008 2008-05-21        9.454   405.8
## 5          1_2009 2009-05-15        9.648    88.1
## NA.1         <NA>       <NA>           NA      NA
## NA.2         <NA>       <NA>           NA      NA
## NA.3         <NA>       <NA>           NA      NA
## 9          1_2013 2013-05-01        9.726   136.3
## NA.4         <NA>       <NA>           NA      NA
## NA.5         <NA>       <NA>           NA      NA
## 12         1_2016 2016-04-03        9.881   271.6
## NA.6         <NA>       <NA>           NA      NA
## NA.7         <NA>       <NA>           NA      NA
## NA.8         <NA>       <NA>           NA      NA
cat("\nDone!\n")
## 
## Done!

Finally add the H2O stored at planting of the CC + rains in the folowwing 60D

# Calculate total water available for cover crop
# Total_CCAW_CC = PAW_50_sowCC + rain_CC

dates_mat$Total_CCAW_CC <- dates_mat$PAW_50_sowCC + dates_mat$rain_CC

# Summary
cat("=== Total_CCAW_CC Results ===\n")
## === Total_CCAW_CC Results ===
cat("Rows with Total_CCAW_CC value:", sum(!is.na(dates_mat$Total_CCAW_CC)), "\n")
## Rows with Total_CCAW_CC value: 1878
cat("\nBasic statistics of Total_CCAW_CC (mm):\n")
## 
## Basic statistics of Total_CCAW_CC (mm):
print(summary(dates_mat$Total_CCAW_CC))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   35.01  113.70  171.76  201.42  269.69  522.51    2266
cat("\n=== Sample Results ===\n")
## 
## === Sample Results ===
print(head(dates_mat[dates_mat$sowing_in_range == TRUE, 
                     c("point_season", "sowing_CC", "PAW_50_sowCC", "rain_CC", "Total_CCAW_CC")], 15))
##      point_season  sowing_CC PAW_50_sowCC rain_CC Total_CCAW_CC
## NA           <NA>       <NA>           NA      NA            NA
## 2          1_2006 2006-05-13        9.789    52.8        62.589
## 3          1_2007 2007-05-04        9.651   364.2       373.851
## 4          1_2008 2008-05-21        9.454   405.8       415.254
## 5          1_2009 2009-05-15        9.648    88.1        97.748
## NA.1         <NA>       <NA>           NA      NA            NA
## NA.2         <NA>       <NA>           NA      NA            NA
## NA.3         <NA>       <NA>           NA      NA            NA
## 9          1_2013 2013-05-01        9.726   136.3       146.026
## NA.4         <NA>       <NA>           NA      NA            NA
## NA.5         <NA>       <NA>           NA      NA            NA
## 12         1_2016 2016-04-03        9.881   271.6       281.481
## NA.6         <NA>       <NA>           NA      NA            NA
## NA.7         <NA>       <NA>           NA      NA            NA
## NA.8         <NA>       <NA>           NA      NA            NA
cat("\nDone!\n")
## 
## Done!

#— 3. Figures ## CUmmulative freequescy of total AW for the CC

# Create cumulative frequency plot for Total_CCAW_CC by ag_zone and point_number
# Filter for sowing_in_range = TRUE only

library(ggplot2)
library(dplyr)

# Filter data: only rows where sowing_in_range = TRUE and Total_CCAW_CC is not NA
data_plot <- dates_mat %>%
  filter(sowing_in_range == TRUE & !is.na(Total_CCAW_CC)) %>%
  select(ag_zone, point_number, Total_CCAW_CC)

# For each ag_zone and point_number, calculate cumulative frequency
data_cumfreq <- data_plot %>%
  group_by(ag_zone, point_number) %>%
  arrange(Total_CCAW_CC) %>%
  mutate(
    rank = row_number(),
    n = n(),
    cumfreq = rank / n  # Cumulative frequency (0 to 1)
  ) %>%
  ungroup()

# Calculate overall cumulative frequency for each ag_zone (combining ALL points)
overall_cumfreq <- data_plot %>%
  group_by(ag_zone) %>%
  arrange(Total_CCAW_CC) %>%
  mutate(
    rank = row_number(),
    n_total = n(),
    overall_cumfreq = rank / n_total
  ) %>%
  ungroup() %>%
  select(ag_zone, Total_CCAW_CC, overall_cumfreq)

# Calculate statistics for each ag_zone
stats_by_zone <- dates_mat %>%
  filter(!is.na(ag_zone)) %>%
  group_by(ag_zone) %>%
  summarise(
    n_points = n_distinct(point_number),
    pct_sowing = round(sum(sowing_in_range == TRUE, na.rm = TRUE) / n() * 100, 1)
  )

# Create the plot with individual point lines + average black line
plot_cumfreq <- ggplot() +
  # Individual point lines (colored)
  geom_line(data = data_cumfreq, aes(x = Total_CCAW_CC, y = cumfreq, color = factor(point_number)), 
            size = 0.6, alpha = 0.6) +
  # Overall cumulative line for all points combined (black, thicker)
  geom_line(data = overall_cumfreq, aes(x = Total_CCAW_CC, y = overall_cumfreq), 
            color = "black", size = 1.2) +
  facet_wrap(~ag_zone, nrow = 5, labeller = labeller(ag_zone = c(
    "1" = "ag_zone 1", "2" = "ag_zone 2", "3" = "ag_zone 3", "4" = "ag_zone 4", 
    "5" = "ag_zone 5", "6" = "ag_zone 6", "7" = "ag_zone 7", "8" = "ag_zone 8",
    "9" = "ag_zone 9", "10" = "ag_zone 10", "11" = "ag_zone 11", "12" = "ag_zone 12",
    "13" = "ag_zone 13", "14" = "ag_zone 14", "15" = "ag_zone 15", "16" = "ag_zone 16",
    "17" = "ag_zone 17", "18" = "ag_zone 18", "19" = "ag_zone 19", "20" = "ag_zone 20"
  ))) +
  geom_text(
    data = stats_by_zone,
    aes(x = Inf, y = 0.05, label = paste("N =", n_points, "\n", pct_sowing, "% sowing")),
    hjust = 1.1, vjust = 0, size = 3.5, color = "black", family = "monospace"
  ) +
  labs(
    title = "Cumulative Frequency of Total Water Available for Cover Crop (Total_CCAW_CC)",
    subtitle = "Colored lines: individual points | Black line: cumulative for all data points combined in zone",
    x = "Total_CCAW_CC (mm)",
    y = "Cumulative Frequency"
  ) +
  theme_minimal() +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5, size = 11, face = "italic"),
    legend.position = "none"
  )

# Display the plot
print(plot_cumfreq)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

# Summary statistics
cat("=== Summary by ag_zone ===\n")
## === Summary by ag_zone ===
print(stats_by_zone)
## # A tibble: 10 × 3
##    ag_zone n_points pct_sowing
##      <dbl>    <int>      <dbl>
##  1       1       20       45.7
##  2       2       20       46.2
##  3       3       20       48.6
##  4       4       20       47.4
##  5       5       19       50.4
##  6       6       20       42.9
##  7       7       20       45.2
##  8       8       20       46.4
##  9       9       20       28.6
## 10      10       20       52.5
cat("\nTotal rows with sowing_in_range = TRUE:", nrow(data_plot), "\n")
## 
## Total rows with sowing_in_range = TRUE: 1878
cat("Total unique points:", n_distinct(data_plot$point_number), "\n")
## Total unique points: 199
# Save the plot
ggsave("cumulative_frequency_CCAW.png", plot_cumfreq, width = 16, height = 12, dpi = 300)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
cat("\nPlot saved as: cumulative_frequency_CCAW.png\n")
## 
## Plot saved as: cumulative_frequency_CCAW.png