library(tidyverse)
library(lubridate)
library(ggrepel)
Function Definitions
calculate_corn_gdd <- function(tmax, tmin) {
  capped_tmax <- pmin(tmax, 30)
  
  daily_gdd <- ((capped_tmax + tmin) / 2) - 10
  
  pmax(daily_gdd, 0)
}
read_nasa_temp <- function(filepath) {
  data <- read_csv(
    filepath,
    skip = 9,
    col_types = cols(
      YEAR = col_integer(),
      MO = col_integer(),
      DY = col_integer(),
      HR = col_integer(),
      T2M = col_double()
    )
  )
  
  data <- data %>%
    mutate(
      date = make_date(YEAR, MO, DY),
      datetime = make_datetime(YEAR, MO, DY, HR)
    ) %>%
    filter(T2M != -999)
  
  return(data)
}
calculate_daily_gdd_from_hourly <- function(hourly_data) {
  daily_temps <- hourly_data %>%
    group_by(date) %>%
    summarise(
      tmax = max(T2M, na.rm = TRUE),
      tmin = min(T2M, na.rm = TRUE),
      .groups = "drop"
    )
  
  daily_temps <- daily_temps %>%
    mutate(gdd = calculate_corn_gdd(tmax, tmin))
  
  return(daily_temps)
}
 
Define Environments
environments <- tibble(
  environment = c("PSU2025", "CLY2025", "PSU2022"),
  planting_date = mdy(c("5/20/2025", "4/04/2025", "5/26/2022")),
  temp_file = c(
    "~/Desktop/POWER_PSU2025.csv",
    "~/Desktop/POWER_CLY2025.csv",
    "~/Desktop/POWER_PSU2022.csv"
  ) #,
  # pheno_file = c(
  #   "PSU2025_spatially_corrected_phenotypes.csv",
  #   "CLY2025_spatially_corrected_phenotypes.csv",
  #   "PSU2022_spatially_corrected_phenotypes.csv"
  # )
)
 
Process Temperature Data
gdd_data <- environments %>%
  mutate(
    temp_data = map(temp_file, read_nasa_temp),
    daily_gdd = map(temp_data, calculate_daily_gdd_from_hourly)
  ) %>%
  select(environment, planting_date, daily_gdd) %>%
  unnest(daily_gdd)
hourly_temp_100_dap <- environments %>%
  mutate(temp_data = map(temp_file, read_nasa_temp)) %>%
  select(environment, planting_date, temp_data) %>%
  unnest(temp_data) %>%
  mutate(days_after_planting = as.integer(date - planting_date)) %>%
  filter(days_after_planting >= 0 & days_after_planting <= 100)
write_csv(hourly_temp_100_dap, "hourly_temp_100_DAP.csv")
cat("Exported hourly_temp_100_DAP.csv with", nrow(hourly_temp_100_dap), "rows\n")
## Exported hourly_temp_100_DAP.csv with 7272 rows
gdd_data <- gdd_data %>%
  mutate(
    days_after_planting = as.integer(date - planting_date),
    julian_day = yday(date)
  ) %>%
  filter(days_after_planting >= 0 &  days_after_planting <= 100) %>%
  group_by(environment) %>%
  mutate(cumulative_gdd_from_planting = cumsum(gdd)) %>%
  ungroup()
 
Prepare GDD Lookup for Phenology Data
gdd_at_phenology <- gdd_data %>%
  select(environment, date, cumulative_gdd_from_planting)
cat("GDD lookup table prepared with", nrow(gdd_at_phenology), "rows\n")
## GDD lookup table prepared with 303 rows
cat("Environments:", paste(unique(gdd_at_phenology$environment), collapse = ", "), "\n")
## Environments: PSU2025, CLY2025, PSU2022
 
Summary Statistics
gdd_summary <- gdd_data %>%
  group_by(environment) %>%
  summarise(
    start_date = min(date),
    end_date = max(date),
    total_gdd = max(cumulative_gdd_from_planting),
    mean_daily_gdd = mean(gdd),
    days_tracked = n(),
    .groups = "drop"
  )
print(gdd_summary)
## # A tibble: 3 × 6
##   environment start_date end_date   total_gdd mean_daily_gdd days_tracked
##   <chr>       <date>     <date>         <dbl>          <dbl>        <int>
## 1 CLY2025     2025-04-04 2025-07-13     1243.           12.3          101
## 2 PSU2022     2022-05-26 2022-09-03     1204.           11.9          101
## 3 PSU2025     2025-05-20 2025-08-28     1141.           11.3          101
 
Visualization
ggplot(gdd_data, aes(x = date, y = gdd, color = environment)) +
  geom_line(alpha = 0.7) +
  labs(
    title = "Daily Growing Degree Days",
    x = "Date",
    y = "Daily GDD (°C)",
    color = "Environment"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "bottom"
  )

ggplot(gdd_data, aes(x = days_after_planting, y = gdd, color = environment)) +
  geom_line(alpha = 0.7) +
  labs(
    title = "Daily Growing Degree Days",
    x = "Days After Planting",
    y = "Daily GDD (°C)",
    color = "Environment"
  ) +
  xlim(0, 100) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "bottom"
  )

ggplot(gdd_data, aes(x = julian_day, y = gdd, color = environment)) +
  geom_line(alpha = 0.7) +
  labs(
    title = "Daily Growing Degree Days by Julian Day",
    x = "Julian Day",
    y = "Daily GDD (°C)",
    color = "Environment"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "bottom"
  )

ggplot(gdd_data, aes(x = days_after_planting, y = cumulative_gdd_from_planting, 
                     color = environment)) +
  geom_line(linewidth = 1) +
  labs(
    title = "Growing Degree Day Accumulation Across Environments",
    x = "Days After Planting",
    y = "Cumulative GDD (°C)",
    color = "Environment"
  ) +
  ylim(0, 1250) +
  xlim(0, 100) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),
    legend.position = "top"
  )

 
Export GDD Data
write_csv(gdd_data, "gdd_all_environments.csv")
write_csv(gdd_at_phenology, "gdd_lookup_table.csv")
cat("Exported files:\n")
## Exported files:
cat("- gdd_all_environments.csv\n")
## - gdd_all_environments.csv
cat("- gdd_lookup_table.csv\n")
## - gdd_lookup_table.csv