Electricity load forecasting

# Packages
library(dplyr)

## 
## Attache Paket: 'dplyr'

## Die folgenden Objekte sind maskiert von 'package:stats':
## 
##     filter, lag

## Die folgenden Objekte sind maskiert von 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(stringr)
library(lubridate)

## 
## Attache Paket: 'lubridate'

## Die folgenden Objekte sind maskiert von 'package:base':
## 
##     date, intersect, setdiff, union

library(readr)
library(purrr)
library(tibble)
library(ggplot2)
library(scales)

## 
## Attache Paket: 'scales'

## Das folgende Objekt ist maskiert 'package:purrr':
## 
##     discard

## Das folgende Objekt ist maskiert 'package:readr':
## 
##     col_factor

library(patchwork)
library(zoo)

## 
## Attache Paket: 'zoo'

## Die folgenden Objekte sind maskiert von 'package:base':
## 
##     as.Date, as.Date.numeric

library(tseries)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(timeDate)
library(glmnet)

## Lade nötiges Paket: Matrix

## 
## Attache Paket: 'Matrix'

## Die folgenden Objekte sind maskiert von 'package:tidyr':
## 
##     expand, pack, unpack

## Loaded glmnet 4.1-10

library(ncdf4)
library(sf)

## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE

library(rnaturalearth)
library(shiny)


# Electricity Load Data
read_one_year_clean <- function(path, year, tz = "Europe/Berlin") {

  lines <- readr::read_lines(path)

  if (length(lines) == 0) stop("Empty file: ", path)
  lines <- lines[-1]

  tibble(line = lines) %>%
   
    mutate(
      line = str_remove(line, '^"'),
      line = str_remove(line, '"$')
    ) %>%
   
    separate(
      col = line,
      into = c("time_range", "Area", "Actual_Total_Load", "Day_ahead_Total_Load_Forecast"),
      sep = ",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)",
      remove = TRUE
    ) %>%
    mutate(
      year = as.integer(year),

   
      Area = str_remove_all(Area, '""'),
      Actual_Total_Load = as.numeric(str_remove_all(Actual_Total_Load, '""')),
      Day_ahead_Total_Load_Forecast = as.numeric(str_remove_all(Day_ahead_Total_Load_Forecast, '""')),

      time = as.POSIXct(
        str_extract(time_range, "^.*(?= -)"),
        format = "%d/%m/%Y %H:%M",
        tz = tz
      )
    ) %>%
    select(year, time, Area,
           load = Actual_Total_Load,
           forecast = Day_ahead_Total_Load_Forecast) %>%
    arrange(time)
}

years <- 2015:2025

files <- setNames(
  paste0("electro", years, ".csv"),
  as.character(years)
)

all_data <- bind_rows(
  lapply(names(files), function(y) read_one_year_clean(files[[y]], y))
) %>%
  arrange(time)


str(all_data)

## tibble [385,728 × 5] (S3: tbl_df/tbl/data.frame)
##  $ year    : int [1:385728] 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
##  $ time    : POSIXct[1:385728], format: "2015-01-01 00:00:00" "2015-01-01 00:15:00" ...
##  $ Area    : chr [1:385728] "Germany (DE)" "Germany (DE)" "Germany (DE)" "Germany (DE)" ...
##  $ load    : num [1:385728] 45274 44870 44964 44894 44442 ...
##  $ forecast: num [1:385728] 42955 42412 41901 41355 40710 ...

head(all_data)

## # A tibble: 6 × 5
##    year time                Area           load forecast
##   <int> <dttm>              <chr>         <dbl>    <dbl>
## 1  2015 2015-01-01 00:00:00 Germany (DE) 45274.   42955.
## 2  2015 2015-01-01 00:15:00 Germany (DE) 44870.   42412.
## 3  2015 2015-01-01 00:30:00 Germany (DE) 44964.   41901.
## 4  2015 2015-01-01 00:45:00 Germany (DE) 44894.   41355.
## 5  2015 2015-01-01 01:00:00 Germany (DE) 44442.   40710.
## 6  2015 2015-01-01 01:15:00 Germany (DE) 44057.   40204.

sum(is.na(all_data$load))

## [1] 0

analyze_year <- function(df, freq = 96) {
  load_ts <- ts(df$load, frequency = freq)
  decomp <- stl(load_ts, s.window = "periodic")
  adf <- adf.test(decomp$time.series[, "remainder"])

  tibble(
    statistic = unname(adf$statistic),
    p_value = adf$p.value,
    lags = adf$parameter
  )
}

adf_by_year <- all_data %>%
  group_by(year) %>%
  group_modify(~ analyze_year(.x, freq = 96)) %>%
  ungroup()

## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value

adf_by_year

## # A tibble: 11 × 4
##     year statistic p_value  lags
##    <int>     <dbl>   <dbl> <dbl>
##  1  2015     -37.6    0.01    32
##  2  2016     -37.8    0.01    32
##  3  2017     -37.7    0.01    32
##  4  2018     -36.7    0.01    32
##  5  2019     -36.3    0.01    32
##  6  2020     -37.6    0.01    32
##  7  2021     -37.4    0.01    32
##  8  2022     -36.6    0.01    32
##  9  2023     -35.7    0.01    32
## 10  2024     -35.6    0.01    32
## 11  2025     -35.3    0.01    32

years <- sort(unique(all_data$year))


#Load plot

plot_load_hourly <- function(df, title = "Electricity load (Germany)") {

  df_h <- df %>%
    filter(!is.na(load), is.finite(load), !is.na(time)) %>%
    mutate(time_hour = floor_date(time, "hour")) %>%
    group_by(time_hour) %>%
    summarise(load = mean(load, na.rm = TRUE), .groups = "drop")

  ggplot(df_h, aes(x = time_hour, y = load)) +
    geom_line(linewidth = 0.4, alpha = 0.9) +
    scale_x_datetime(date_breaks = "1 month", date_labels = "%b %Y") +
    scale_y_continuous(labels = scales::comma) +
    labs(
      title = title,
      subtitle = "Hourly average (smoothed for readability)",
      x = NULL,
      y = "Load (MW)"
    ) +
    theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(face = "bold"),
      panel.grid.minor = element_blank()
    )
}

#STL decomposition plot
plot_stl_pretty <- function(df, freq = 96, title = "STL decomposition") {

  df <- df %>%
    filter(!is.na(load), is.finite(load), !is.na(time)) %>%
    arrange(time)

  stopifnot(nrow(df) > freq * 2)

  ts_load <- ts(df$load, frequency = freq)
  decomp <- stl(ts_load, s.window = "periodic")

  stl_df <- tibble(
    time = df$time,
    data = as.numeric(ts_load),
    seasonal = as.numeric(decomp$time.series[, "seasonal"]),
    trend = as.numeric(decomp$time.series[, "trend"]),
    remainder = as.numeric(decomp$time.series[, "remainder"])
  )

  p1 <- ggplot(stl_df, aes(time, data)) +
    geom_line(linewidth = 0.25, alpha = 0.85) +
    scale_y_continuous(labels = comma) +
    labs(title = title, subtitle = "Observed", x = NULL, y = "MW") +
    theme_minimal(base_size = 12) +
    theme(plot.title = element_text(face = "bold"),
          panel.grid.minor = element_blank())

  p2 <- ggplot(stl_df, aes(time, seasonal)) +
    geom_line(linewidth = 0.25, alpha = 0.85) +
    labs(subtitle = "Seasonal", x = NULL, y = NULL) +
    theme_minimal(base_size = 12) +
    theme(panel.grid.minor = element_blank())

  p3 <- ggplot(stl_df, aes(time, trend)) +
    geom_line(linewidth = 0.35, alpha = 0.9) +
    labs(subtitle = "Trend", x = NULL, y = NULL) +
    theme_minimal(base_size = 12) +
    theme(panel.grid.minor = element_blank())

  p4 <- ggplot(stl_df, aes(time, remainder)) +
    geom_line(linewidth = 0.25, alpha = 0.85) +
    labs(subtitle = "Remainder", x = NULL, y = NULL) +
    theme_minimal(base_size = 12) +
    theme(panel.grid.minor = element_blank())

  p1 / p2 / p3 / p4 + plot_layout(heights = c(1.2, 1, 1, 1))
}


df_2025 <- all_data %>% filter(year == 2025)

p_load <- plot_load_hourly(df_2025, title = "Germany electricity load – 2025")
p_stl  <- plot_stl_pretty(df_2025, freq = 96, title = "Germany electricity load – 2025 (STL)")

print(p_load)

print(p_stl)

# Temperature function

read_era5_t2m_DE <- function(nc_path, tz_out = "Europe/Berlin") {

  nc <- ncdf4::nc_open(nc_path)
  on.exit(ncdf4::nc_close(nc), add = TRUE)

  lat <- ncdf4::ncvar_get(nc, "latitude")
  time_raw <- ncdf4::ncvar_get(nc, "valid_time")
  time_units <- ncdf4::ncatt_get(nc, "valid_time", "units")$value

  origin_str <- sub(".*since\\s+", "", time_units)
  origin_str <- sub("\\.0$", "", origin_str)
  origin <- as.POSIXct(origin_str, tz = "UTC")

  if (grepl("^hours", time_units)) {
    time_utc <- origin + lubridate::dhours(time_raw)
  } else if (grepl("^seconds", time_units)) {
    time_utc <- origin + lubridate::dseconds(time_raw)
  } else {
    stop("Unknown valid_time units: ", time_units)
  }

  # temperature
  t2m <- ncdf4::ncvar_get(nc, "t2m")
  dim_names <- sapply(nc$var[["t2m"]]$dim, `[[`, "name")

  # weights by latitude
  w_lat <- cos(lat * pi / 180)
  w_lat <- w_lat / sum(w_lat)

  # average over longitude
  if (identical(dim_names, c("longitude", "latitude", "valid_time"))) {
    # t2m[lon, lat, time] -> mean over lon -> [lat, time]
    mean_lon <- apply(t2m, c(2, 3), mean, na.rm = TRUE)
  } else if (identical(dim_names, c("valid_time", "latitude", "longitude"))) {
    # t2m[time, lat, lon] -> mean over lon -> [time, lat]
    mean_lon <- apply(t2m, c(1, 2), mean, na.rm = TRUE)
  } else if (identical(dim_names, c("latitude", "longitude", "valid_time"))) {
    # t2m[lat, lon, time] -> mean over lon -> [lat, time]
    mean_lon <- apply(t2m, c(1, 3), mean, na.rm = TRUE)
  } else {
    stop("Unexpected t2m dimension order: ", paste(dim_names, collapse = ", "))
  }

  mean_lon <- as.matrix(mean_lon)

  # weight over latitude
  if (nrow(mean_lon) == length(w_lat)) {
    # [lat, time]
    mean_area <- colSums(sweep(mean_lon, 1, w_lat, `*`), na.rm = TRUE)
  } else if (ncol(mean_lon) == length(w_lat)) {
    # [time, lat]
    mean_area <- rowSums(sweep(mean_lon, 2, w_lat, `*`), na.rm = TRUE)
  } else {
    stop("Latitude dimension mismatch: nrow=", nrow(mean_lon),
         " ncol=", ncol(mean_lon),
         " lat_len=", length(w_lat))
  }

  tibble::tibble(
    time = lubridate::with_tz(time_utc, tz_out),
    temp_c = as.numeric(mean_area - 273.15)
  ) %>%
    arrange(time)
}




files <- list.files(
  pattern = "^Temperature.*\\.nc$",
  full.names = TRUE
)

files <- files[order(basename(files))]

temp_all <- purrr::map_dfr(
  files,
  ~ read_era5_t2m_DE(.x) %>%
      mutate(
        source_file = basename(.x),
        year = lubridate::year(time)
      )
) %>%
  arrange(time) %>%
  filter(year >= 2015, year <= 2025)

glimpse(temp_all)

## Rows: 96,382
## Columns: 4
## $ time        <dttm> 2015-01-01 01:00:00, 2015-01-01 02:00:00, 2015-01-01 03:0…
## $ temp_c      <dbl> 0.6985498, 0.6214059, 0.5340014, 0.4823854, 0.4143815, 0.3…
## $ source_file <chr> "Temperature2015.nc", "Temperature2015.nc", "Temperature20…
## $ year        <dbl> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015…

print(table(temp_all$year))

## 
## 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 
## 8759 8784 8760 8760 8760 8784 8760 8760 8760 8784 8711

summary(temp_all$temp_c)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -12.236   4.317   9.591  10.062  15.533  32.559

# Germany grid map 
plot_era5_grid_over_germany <- function(nc_path) {
  nc <- ncdf4::nc_open(nc_path)
  on.exit(ncdf4::nc_close(nc), add = TRUE)

  lat <- ncdf4::ncvar_get(nc, "latitude")
  lon <- ncdf4::ncvar_get(nc, "longitude")

  grid <- expand.grid(longitude = lon, latitude = lat)
  de <- rnaturalearth::ne_countries(country = "Germany", returnclass = "sf")

  ggplot() +
    geom_sf(data = de, fill = NA, linewidth = 0.6) +
    geom_point(data = grid, aes(longitude, latitude), size = 0.15, alpha = 0.35) +
    coord_sf(xlim = range(lon), ylim = range(lat), expand = FALSE) +
    labs(
      title = "ERA5 grid points over Germany",
      x = "Longitude", y = "Latitude"
    ) +
    theme_minimal(base_size = 12) +
    theme(panel.grid.minor = element_blank())
}


p_map <- plot_era5_grid_over_germany(files[1])
print(p_map)

# Conversion of hourly temp to 15-min
temp_15m <- temp_all %>%
  group_by(year) %>%
  arrange(time, .by_group = TRUE) %>%
  complete(time = seq(min(time), max(time), by = "15 mins")) %>%
  arrange(time, .by_group = TRUE) %>%
  mutate(temp_c = zoo::na.locf(temp_c, na.rm = FALSE)) %>%
  ungroup()


sum(is.na(temp_15m$temp_c))

## [1] 0

glimpse(temp_15m)

## Rows: 385,495
## Columns: 4
## $ year        <dbl> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015…
## $ time        <dttm> 2015-01-01 01:00:00, 2015-01-01 01:15:00, 2015-01-01 01:3…
## $ temp_c      <dbl> 0.6985498, 0.6985498, 0.6985498, 0.6985498, 0.6214059, 0.6…
## $ source_file <chr> "Temperature2015.nc", NA, NA, NA, "Temperature2015.nc", NA…

#Events

years <- seq(year(min(all_data$time, na.rm = TRUE)),
             year(max(all_data$time, na.rm = TRUE)))

# Nationwide German public holidays
# Fixed-date nationwide holidays:
# - Jan 1  (New Year)
# - May 1  (Labour Day)
# - Oct 3  (German Unity Day)
# - Dec 25 (1.Christmas Day)
# - Dec 26 (2.Christmas Day)
fixed_holidays <- as.Date(c(
  paste0(years, "-01-01"),
  paste0(years, "-05-01"),
  paste0(years, "-10-03"),
  paste0(years, "-12-25"),
  paste0(years, "-12-26")
))

# Movable nationwide holidays (based on Easter):
# - Good Friday (Easter - 2)
# - Easter Monday (Easter + 1)
# - Ascension Day (Easter + 39)
# - Whit Monday / Pfingstmontag (Easter + 50)
easter_dates <- as.Date(Easter(years))

movable_holidays <- as.Date(c(
  easter_dates - 2,   # Good Friday
  easter_dates + 1,   # Easter Monday
  easter_dates + 39,  # Ascension Day
  easter_dates + 50   # Whit Monday
))

# Final holiday set
holidays_de_nationwide <- unique(c(fixed_holidays, movable_holidays))



#Adding the holidays to the dataset
df <- all_data %>%
  mutate(
    date = as.Date(time),

    # Weekend (Saturday, Sunday)
    is_weekend = as.integer(wday(date, week_start = 1) %in% c(6, 7)),

    # Nationwide holiday
    is_holiday = as.integer(date %in% holidays_de_nationwide),

    # Workday = not weekend and not holiday
    is_workday = as.integer(is_weekend == 0 & is_holiday == 0),

    # Time features
    hour  = hour(time),
    dow   = wday(time, week_start = 1),  # day of the week
    month = month(time),

    # Day/Night
    is_night = as.integer(hour < 7 | hour >= 22),
    is_day   = as.integer(!is_night),

    # 4-part time of day
    time_of_day = case_when(
      hour < 6  ~ "night",
      hour < 12 ~ "morning",
      hour < 18 ~ "day",
      TRUE      ~ "evening"
    ),
    time_of_day = factor(time_of_day,
                         levels = c("night", "morning", "day", "evening"))
  )

#Sanity checks 
df %>% count(is_weekend, is_holiday, is_workday)

## # A tibble: 4 × 4
##   is_weekend is_holiday is_workday      n
##        <int>      <int>      <int>  <int>
## 1          0          0          1 267456
## 2          0          1          0   8064
## 3          1          0          0 108768
## 4          1          1          0   1440

df %>%
  group_by(is_night) %>%
  summarise(mean_load = mean(load, na.rm = TRUE), .groups = "drop")

## # A tibble: 2 × 2
##   is_night mean_load
##      <int>     <dbl>
## 1        0    60627.
## 2        1    47764.

df %>%
  filter(is_holiday == 1) %>%
  distinct(date) %>%
  arrange(date) %>%
  head(30)

## # A tibble: 30 × 1
##    date      
##    <date>    
##  1 2015-01-01
##  2 2015-04-03
##  3 2015-04-06
##  4 2015-05-01
##  5 2015-05-14
##  6 2015-05-25
##  7 2015-10-03
##  8 2015-12-25
##  9 2015-12-26
## 10 2016-01-01
## # ℹ 20 more rows

######################################################################################################################################################################################################################################################################################################################################################################################################


#Filter to common period

df_2015_2025 <- df %>%
  filter(year >= 2015, year <= 2025)

temp_2015_2025 <- temp_15m %>%
  filter(year >= 2015, year <= 2025)


#Filling the data

temp_2015_2025_clean <- temp_2015_2025 %>%
  select(time, temp_c) %>%
  arrange(time) %>%
  distinct(time, .keep_all = TRUE) %>%
  mutate(
    temp_c = zoo::na.locf(temp_c, na.rm = FALSE),
    temp_c = zoo::na.locf(temp_c, fromLast = TRUE, na.rm = FALSE)
  )



#Merging the data

df_model_2015_2025 <- df_2015_2025 %>%
  left_join(temp_2015_2025_clean, by = "time") %>%
  mutate(
    log_load = log(load),
    date = as.Date(with_tz(time, "Europe/Berlin"))
  ) %>%
  arrange(time)


cat("Rows:", nrow(df_model_2015_2025), "\n")

## Rows: 385728

cat("Missing temp_c:", sum(is.na(df_model_2015_2025$temp_c)), "\n")

## Missing temp_c: 233

range(df_model_2015_2025$time)

## [1] "2015-01-01 00:00:00 CET" "2025-12-31 23:45:00 CET"

table(df_model_2015_2025$year)

## 
##  2015  2016  2017  2018  2019  2020  2021  2022  2023  2024  2025 
## 35040 35136 35040 35040 35040 35136 35040 35040 35040 35136 35040

######################################################################################################################################################################################################################################################################################################################################################################################################


#Check for correlation

vars_for_cor <- df_model_2015_2025 %>%
  select(
    log_load,
    temp_c,
    is_weekend,
    is_holiday,
    is_night
  )

cor(vars_for_cor, use = "complete.obs")

##              log_load       temp_c    is_weekend    is_holiday      is_night
## log_load    1.0000000 -0.153321837 -4.592725e-01 -1.730015e-01 -6.293411e-01
## temp_c     -0.1533218  1.000000000 -8.893770e-04 -3.554656e-02 -1.934827e-01
## is_weekend -0.4592725 -0.000889377  1.000000e+00 -4.727218e-02 -2.074786e-06
## is_holiday -0.1730015 -0.035546561 -4.727218e-02  1.000000e+00  2.183079e-05
## is_night   -0.6293411 -0.193482680 -2.074786e-06  2.183079e-05  1.000000e+00

# log_load vs night dummy
boxplot(log_load ~ is_night,
        data = df_model_2015_2025,
        names = c("Day", "Night"),
        main = "Day vs Night electricity consumption",
        ylab = "log(load)")

# log_load vs weekend
boxplot(log_load ~ is_weekend,
        data = df_model_2015_2025,
        names = c("Weekday", "Weekend"),
        main = "Weekday vs Weekend electricity consumption",
        ylab = "log(load)")

mm1 <- lm(
  log_load ~ temp_c + is_night + is_weekend + is_holiday,
  data = df_model_2015_2025
)
summary(mm1)

## 
## Call:
## lm(formula = log_load ~ temp_c + is_night + is_weekend + is_holiday, 
##     data = df_model_2015_2025)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.46483 -0.06083  0.00513  0.06522  0.49381 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.115e+01  3.274e-04 34041.7   <2e-16 ***
## temp_c      -7.457e-03  2.174e-05  -342.9   <2e-16 ***
## is_night    -2.590e-01  3.231e-04  -801.4   <2e-16 ***
## is_weekend  -1.897e-01  3.400e-04  -558.0   <2e-16 ***
## is_holiday  -2.422e-01  9.915e-04  -244.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09529 on 385490 degrees of freedom
##   (233 Beobachtungen als fehlend gelöscht)
## Multiple R-squared:  0.728,  Adjusted R-squared:  0.728 
## F-statistic: 2.579e+05 on 4 and 385490 DF,  p-value: < 2.2e-16

# A1) Heating/Cooling degree variables

T_base <- 18

df_model_2015_2025 <- df_model_2015_2025 %>%
  mutate(
    HDD = pmax(0, T_base - temp_c),
    CDD = pmax(0, temp_c - T_base)
  )

# Sanity check
summary(df_model_2015_2025$HDD)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   2.466   8.408   8.459  13.683  30.236     233

summary(df_model_2015_2025$CDD)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   0.000   0.000   0.522   0.000  14.559     233

# A2) Lags

df_model_2015_2025 <- df_model_2015_2025 %>%
  arrange(time) %>%
  mutate(
    lag_1   = dplyr::lag(log_load, 1),
    lag_96  = dplyr::lag(log_load, 96),
    lag_672 = dplyr::lag(log_load, 672)
  )


colSums(is.na(df_model_2015_2025[, c("lag_1","lag_96","lag_672")]))

##   lag_1  lag_96 lag_672 
##       1      96     672

#Seasonal Fourier Features: sin/cos (day/week/year)

df_model_2015_2025 <- df_model_2015_2025 %>%
  mutate(
   
    time_local = with_tz(time, "Europe/Berlin"),

    #15-min slot within day
    slot_day = hour(time_local) * 4 + minute(time_local) %/% 15 + 1,

    sin_day = sin(2 * pi * slot_day / 96),
    cos_day = cos(2 * pi * slot_day / 96),

    #week cycle 
    dow = wday(time_local, week_start = 1),
    sin_week = sin(2 * pi * dow / 7),
    cos_week = cos(2 * pi * dow / 7),

    #year cycle 
    doy = yday(time_local),
    sin_year = sin(2 * pi * doy / 365),
    cos_year = cos(2 * pi * doy / 365)
  ) %>%
  select(-time_local, -slot_day)

summary(select(
  df_model_2015_2025,
  sin_day, cos_day,
  sin_week, cos_week,
  sin_year, cos_year
))

##     sin_day           cos_day           sin_week          cos_week      
##  Min.   :-1.0000   Min.   :-1.0000   Min.   :-0.9749   Min.   :-0.9010  
##  1st Qu.:-0.7071   1st Qu.:-0.7071   1st Qu.:-0.7818   1st Qu.:-0.9010  
##  Median : 0.0000   Median : 0.0000   Median : 0.0000   Median :-0.2225  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.7071   3rd Qu.: 0.7071   3rd Qu.: 0.7818   3rd Qu.: 0.6235  
##  Max.   : 1.0000   Max.   : 1.0000   Max.   : 0.9749   Max.   : 1.0000  
##     sin_year             cos_year         
##  Min.   :-0.9999907   Min.   :-0.9999630  
##  1st Qu.:-0.7055836   1st Qu.:-0.7086267  
##  Median : 0.0000000   Median : 0.0043035  
##  Mean   :-0.0002024   Mean   : 0.0007908  
##  3rd Qu.: 0.7055836   3rd Qu.: 0.7025275  
##  Max.   : 0.9999907   Max.   : 1.0000000

m_seasonal_linear <- lm(
  log_load ~
    temp_c +
    is_night +
    is_weekend +
    is_holiday,
  data = df_model_2015_2025
)

summary(m_seasonal_linear)

## 
## Call:
## lm(formula = log_load ~ temp_c + is_night + is_weekend + is_holiday, 
##     data = df_model_2015_2025)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.46483 -0.06083  0.00513  0.06522  0.49381 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.115e+01  3.274e-04 34041.7   <2e-16 ***
## temp_c      -7.457e-03  2.174e-05  -342.9   <2e-16 ***
## is_night    -2.590e-01  3.231e-04  -801.4   <2e-16 ***
## is_weekend  -1.897e-01  3.400e-04  -558.0   <2e-16 ***
## is_holiday  -2.422e-01  9.915e-04  -244.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09529 on 385490 degrees of freedom
##   (233 Beobachtungen als fehlend gelöscht)
## Multiple R-squared:  0.728,  Adjusted R-squared:  0.728 
## F-statistic: 2.579e+05 on 4 and 385490 DF,  p-value: < 2.2e-16

par(mfrow = c(1,2))

plot(
  fitted(m_seasonal_linear),
  resid(m_seasonal_linear),
  main = "Seasonal linear: Residuals vs Fitted",
  xlab = "Fitted", ylab = "Residuals"
)

hist(
  resid(m_seasonal_linear),
  breaks = 50,
  main = "Seasonal linear: Residuals distribution",
  xlab = "Residuals"
)

m_full <- lm(
  log_load ~
    HDD + CDD +
    is_night + is_weekend + is_holiday +
    sin_day + cos_day +
    sin_week + cos_week +
    sin_year + cos_year +
    lag_1 + lag_96 + lag_672,
  data = df_model_2015_2025
)

summary(m_full)

## 
## Call:
## lm(formula = log_load ~ HDD + CDD + is_night + is_weekend + is_holiday + 
##     sin_day + cos_day + sin_week + cos_week + sin_year + cos_year + 
##     lag_1 + lag_96 + lag_672, data = df_model_2015_2025)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33150 -0.00654 -0.00162  0.00471  0.30760 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.165e-02  2.262e-03   40.51   <2e-16 ***
## HDD          2.291e-04  6.031e-06   38.00   <2e-16 ***
## CDD          1.949e-04  1.181e-05   16.50   <2e-16 ***
## is_night    -4.024e-03  7.161e-05  -56.19   <2e-16 ***
## is_weekend  -3.506e-03  8.288e-05  -42.30   <2e-16 ***
## is_holiday  -6.808e-03  1.247e-04  -54.60   <2e-16 ***
## sin_day      8.299e-03  3.964e-05  209.39   <2e-16 ***
## cos_day     -5.871e-03  4.171e-05 -140.74   <2e-16 ***
## sin_week     7.182e-04  2.805e-05   25.61   <2e-16 ***
## cos_week     8.395e-04  4.393e-05   19.11   <2e-16 ***
## sin_year    -5.078e-04  2.854e-05  -17.79   <2e-16 ***
## cos_year    -7.879e-04  5.016e-05  -15.71   <2e-16 ***
## lag_1        9.667e-01  3.243e-04 2980.52   <2e-16 ***
## lag_96       6.237e-03  2.370e-04   26.32   <2e-16 ***
## lag_672      1.875e-02  2.583e-04   72.60   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01023 on 384812 degrees of freedom
##   (901 Beobachtungen als fehlend gelöscht)
## Multiple R-squared:  0.9969, Adjusted R-squared:  0.9969 
## F-statistic: 8.739e+06 on 14 and 384812 DF,  p-value: < 2.2e-16

summary(m_seasonal_linear)$r.squared

## [1] 0.72798

summary(m_full)$r.squared

## [1] 0.9968647

# BACKTEST 
df_bt <- df_model_2015_2025 %>%
  arrange(time) %>%
  filter(!is.na(log_load), !is.na(temp_c))
predict_one_day <- function(train_df, test_df, formula) {


  fit <- lm(formula, data = train_df)

  
  pred_log <- predict(fit, newdata = test_df)

  test_df %>%
    mutate(pred_log = pred_log,
           pred = exp(pred_log))
}
backtest_day_ahead <- function(df, dates, formula) {

  res <- vector("list", length(dates))

  for (i in seq_along(dates)) {

    d <- dates[i]

    train_df <- df %>% filter(date < d)
    test_df  <- df %>% filter(date == d)

    
    if (nrow(train_df) < 365 * 96 || nrow(test_df) != 96) {
      res[[i]] <- NULL
      next
    }

    pred_df <- predict_one_day(train_df, test_df, formula)

    res[[i]] <- pred_df %>%
      select(time, load, pred) %>%
      mutate(date = d)
  }

  bind_rows(res)
}
f_seasonal_linear <- log_load ~
  temp_c +
  is_night +
  is_weekend +
  is_holiday

f_full <- log_load ~
  HDD + CDD +
  is_night + is_weekend + is_holiday +
  sin_day + cos_day +
  sin_week + cos_week +
  sin_year + cos_year +
  lag_1 + lag_96 + lag_672

f_full_no_lag1 <- log_load ~
  HDD + CDD +
  is_night + is_weekend + is_holiday +
  sin_day + cos_day +
  sin_week + cos_week +
  sin_year + cos_year +
  lag_96 + lag_672


dates_bt <- seq(as.Date("2020-01-01"),
                as.Date("2020-03-31"),
                by = "day")

bt_seasonal <- backtest_day_ahead(df_bt, dates_bt, f_seasonal_linear)
bt_full     <- backtest_day_ahead(df_bt, dates_bt, f_full)
bt_full_no_lag1 <- backtest_day_ahead(df_bt, dates_bt, f_full_no_lag1)


calc_metrics <- function(df) {

  y_true <- NULL
  y_hat  <- NULL

  if ("load" %in% names(df)) y_true <- df$load
  if ("pred_load" %in% names(df)) y_hat <- df$pred_load
  if (is.null(y_hat) && "pred" %in% names(df)) y_hat <- df$pred


  if (is.null(y_true) || is.null(y_hat)) {
    return(tibble(note = "Metrics cannot be computed"))
  }

  
  if (!any(!is.na(y_true))) {
    return(tibble(note = "No actual load available"))
  }

  tibble(
    n = sum(!is.na(y_true) & !is.na(y_hat)),
    rmse = sqrt(mean((y_hat - y_true)^2, na.rm = TRUE)),
    mae  = mean(abs(y_hat - y_true), na.rm = TRUE),
    mape = mean(abs(y_hat - y_true) / pmax(y_true, 1e-6), na.rm = TRUE) * 100
  )
}

metrics_seasonal <- calc_metrics(bt_seasonal)
metrics_full     <- calc_metrics(bt_full)
metrics_full_no_lag1 <- calc_metrics(bt_full_no_lag1)

metrics_full_no_lag1

## # A tibble: 1 × 4
##       n  rmse   mae  mape
##   <int> <dbl> <dbl> <dbl>
## 1  8736 3041. 2246.  3.81

metrics_seasonal

## # A tibble: 1 × 4
##       n  rmse   mae  mape
##   <int> <dbl> <dbl> <dbl>
## 1  8736 5087. 4066.  6.95

metrics_full

## # A tibble: 1 × 4
##       n  rmse   mae  mape
##   <int> <dbl> <dbl> <dbl>
## 1  8736  617.  449. 0.738

d_show <- as.Date("2020-01-15")

ggplot() +
  geom_line(data = bt_seasonal %>% filter(date == d_show),
            aes(time, load), colour = "black") +
  geom_line(data = bt_seasonal %>% filter(date == d_show),
            aes(time, pred), colour = "red", linetype = "dashed") +
  geom_line(data = bt_full %>% filter(date == d_show),
            aes(time, pred), colour = "blue", linetype = "dashed") +
  geom_line(data = bt_full_no_lag1 %>% filter(date == d_show),
            aes(time, pred), colour = "darkgreen", linetype = "dashed") +
  labs(
    title = "Day-ahead forecast comparison",
    subtitle = "Black=actual, Red=seasonal linear, Blue=full (with lag_1), Green=full (no lag_1)",
    y = "Load (MW)", x = NULL
  ) +
  theme_minimal()

#Shiny

# Safety

assert_has_cols <- function(df, cols, df_name = "df") {
  miss <- setdiff(cols, names(df))
  if (length(miss) > 0) stop(df_name, " is missing columns: ", paste(miss, collapse = ", "))
  invisible(TRUE)
}


# Temperature

make_temp_climatology_15m <- function(train_df, tz = "Europe/Berlin") {
  assert_has_cols(train_df, c("time", "temp_c"), "train_df")

  train_df %>%
    filter(!is.na(temp_c)) %>%
    mutate(
      time_local = with_tz(time, tz),
      doy    = yday(time_local),
      slot15 = hour(time_local) * 4 + minute(time_local) %/% 15 + 1
    ) %>%
    group_by(doy, slot15) %>%
    summarise(temp_c_clim = mean(temp_c), .groups = "drop")
}

add_temp_by_scenario <- function(future_df, full_df, train_df,
                                 scenario = c("clim_15m", "persistence"),
                                 tz = "Europe/Berlin") {
  scenario <- match.arg(scenario)
  assert_has_cols(future_df, c("time"), "future_df")
  assert_has_cols(full_df, c("time","temp_c"), "full_df")
  assert_has_cols(train_df, c("time","temp_c"), "train_df")

  if (scenario == "clim_15m") {
    clim <- make_temp_climatology_15m(train_df, tz = tz)

    out <- future_df %>%
      mutate(
        time_local = with_tz(time, tz),
        doy    = yday(time_local),
        slot15 = hour(time_local) * 4 + minute(time_local) %/% 15 + 1
      ) %>%
      left_join(clim, by = c("doy","slot15")) %>%
      transmute(time = time, temp_c = temp_c_clim)

  } else { # persistence
    tmp <- full_df %>%
      arrange(time) %>%
      mutate(temp_c_lag96 = dplyr::lag(temp_c, 96)) %>%
      select(time, temp_c_lag96)

    out <- future_df %>%
      left_join(tmp, by = "time") %>%
      transmute(time = time, temp_c = temp_c_lag96)
  }

  out %>%
    arrange(time) %>%
    mutate(
      temp_c = zoo::na.locf(temp_c, na.rm = FALSE),
      temp_c = zoo::na.locf(temp_c, fromLast = TRUE, na.rm = FALSE)
    )
}


# 2) Parameters

make_future_features <- function(future_df, holidays_vec,
                                 tz = "Europe/Berlin", T_base = 18) {
  assert_has_cols(future_df, c("time","temp_c"), "future_df")

  future_df %>%
    arrange(time) %>%
    mutate(
      time_local = with_tz(time, tz),
      date       = as.Date(time_local),

      is_weekend = as.integer(wday(date, week_start = 1) %in% c(6,7)),
      is_holiday = as.integer(date %in% holidays_vec),
      is_night   = as.integer(hour(time_local) < 7 | hour(time_local) >= 22),

      HDD = pmax(0, T_base - temp_c),
      CDD = pmax(0, temp_c - T_base),

      slot_day = hour(time_local) * 4 + minute(time_local) %/% 15 + 1,
      sin_day  = sin(2*pi*slot_day/96),
      cos_day  = cos(2*pi*slot_day/96),

      dow      = wday(time_local, week_start = 1),
      sin_week = sin(2*pi*dow/7),
      cos_week = cos(2*pi*dow/7),

      doy      = yday(time_local),
      sin_year = sin(2*pi*doy/365),
      cos_year = cos(2*pi*doy/365)
    ) %>%
    select(-time_local, -slot_day)
}


# 3) Models

get_model_formula <- function(model_type = c("seasonal", "full"),
                              use_lag1 = FALSE) {
  model_type <- match.arg(model_type)

  if (model_type == "seasonal") {
    fml <- log_load ~
      temp_c +
      is_night + is_weekend + is_holiday +
      sin_day + cos_day +
      sin_week + cos_week +
      sin_year + cos_year
  }

  if (model_type == "full") {
    if (use_lag1) {
      fml <- log_load ~
        HDD + CDD +
        is_night + is_weekend + is_holiday +
        sin_day + cos_day +
        sin_week + cos_week +
        sin_year + cos_year +
        lag_1 + lag_96 + lag_672
    } else {
      fml <- log_load ~
        HDD + CDD +
        is_night + is_weekend + is_holiday +
        sin_day + cos_day +
        sin_week + cos_week +
        sin_year + cos_year +
        lag_96 + lag_672
    }
  }

  fml
}


# 4) Day-ahead forecasting

predict_day_ahead_real <- function(full_df, day_date,
                                   model_type = c("seasonal","full"),
                                   temp_scenario = c("clim_15m","persistence"),
                                   holidays_vec,
                                   tz = "Europe/Berlin",
                                   T_base = 18,
                                   use_lag1 = FALSE,
                                   min_train_days = 365) {

  model_type    <- match.arg(model_type)
  temp_scenario <- match.arg(temp_scenario)

  assert_has_cols(full_df, c("time","load","log_load","temp_c"), "full_df")

  cutoff <- as.POSIXct(paste0(as.Date(day_date), " 00:00:00"), tz = tz)
  train_df <- full_df %>% filter(time < cutoff)

  if (nrow(train_df) < min_train_days * 96) {
    stop("Not enough training history before ", as.character(day_date),
         ". Need at least ", min_train_days, " days.")
  }

  future_times <- full_df %>%
    mutate(date_local = as.Date(with_tz(time, tz))) %>%
    filter(date_local == as.Date(day_date)) %>%
    arrange(time) %>%
    pull(time)

  if (length(future_times) == 0) stop("No timestamps found for selected date.")

  future_df <- tibble(time = future_times)

  future_df <- add_temp_by_scenario(
    future_df = future_df,
    full_df   = full_df,
    train_df  = train_df,
    scenario  = temp_scenario,
    tz = tz
  )

  future_df <- make_future_features(
    future_df,
    holidays_vec = holidays_vec,
    tz = tz,
    T_base = T_base
  )

  lag_map <- full_df %>%
    arrange(time) %>%
    mutate(
      lag_1   = dplyr::lag(log_load, 1),
      lag_96  = dplyr::lag(log_load, 96),
      lag_672 = dplyr::lag(log_load, 672)
    ) %>%
    select(time, lag_1, lag_96, lag_672)

  future_df <- future_df %>% left_join(lag_map, by = "time")

  fml <- get_model_formula(model_type = model_type, use_lag1 = use_lag1)
  needed <- setdiff(all.vars(fml), "log_load")
  assert_has_cols(train_df, c("log_load", needed), "train_df")

  train_fit <- train_df %>% select(log_load, all_of(needed)) %>% drop_na()
  model <- lm(fml, data = train_fit)

  pred_log <- predict(model, newdata = future_df)

  out <- future_df %>%
    mutate(
      pred_log_load = as.numeric(pred_log),
      pred_load     = exp(pred_log_load)
    )

  actual_df <- full_df %>%
    mutate(date_local = as.Date(with_tz(time, tz))) %>%
    filter(date_local == as.Date(day_date)) %>%
    select(time, load)

  out %>%
    left_join(actual_df, by = "time") %>%
    mutate(
      date = as.Date(day_date),
      model_type = model_type,
      temp_scenario = temp_scenario
    )
}


# 5) Day-ahead forecasting for 7 days

predict_week_real <- function(full_df, start_date,
                              model_type = c("seasonal","full"),
                              temp_scenario = c("clim_15m","persistence"),
                              holidays_vec,
                              tz = "Europe/Berlin",
                              T_base = 18,
                              use_lag1 = FALSE,
                              min_train_days = 365,
                              horizon_days = 7) {

  model_type    <- match.arg(model_type)
  temp_scenario <- match.arg(temp_scenario)

  days <- seq(as.Date(start_date), by = "day", length.out = horizon_days)

  res <- lapply(days, function(d) {
    predict_day_ahead_real(
      full_df = full_df,
      day_date = d,
      model_type = model_type,
      temp_scenario = temp_scenario,
      holidays_vec = holidays_vec,
      tz = tz,
      T_base = T_base,
      use_lag1 = use_lag1,
      min_train_days = min_train_days
    )
  })

  bind_rows(res)
}

calc_metrics <- function(df) {
  if (!any(!is.na(df$load))) {
    return(tibble(note = "No actual load available"))
  }
  tibble(
    n = sum(!is.na(df$load) & !is.na(df$pred_load)),
    rmse = sqrt(mean((df$pred_load - df$load)^2, na.rm = TRUE)),
    mae  = mean(abs(df$pred_load - df$load), na.rm = TRUE),
    mape = mean(abs(df$pred_load - df$load) / pmax(df$load, 1e-6), na.rm = TRUE) * 100
  )
}


# 6) Data

TZ_APP <- "Europe/Berlin"

df_bt <- df_model_2015_2025 %>%
  filter(!is.na(temp_c), !is.na(load), !is.na(log_load)) %>%
  arrange(time)

available_dates <- df_bt %>%
  mutate(date_local = as.Date(with_tz(time, TZ_APP))) %>%
  distinct(date_local) %>%
  arrange(date_local) %>%
  pull(date_local)

max_week_start <- max(available_dates) - 6


# 7) UI: pages 1-3 (Day-ahead forecasting, Day-ahead forecasting for 7 days)

ui <- fluidPage(
  titlePanel("Real forecast (no leakage)"),

  tabsetPanel(
    tabPanel(
      "1) Day-ahead (1 day)",
      sidebarLayout(
        sidebarPanel(
          helpText("The forecast for the selected day is built only from the past"),

          checkboxGroupInput(
            "d_models",
            "Models to show",
            choices = c("seasonal", "full"),
            selected = c("seasonal", "full")
          ),

          selectInput("d_temp", "Temperature scenario",
                      choices = c("clim_15m", "persistence"),
                      selected = "clim_15m"),

          dateInput("d_day", "Forecast day",
                    value = tail(available_dates, 1),
                    min = min(available_dates),
                    max = max(available_dates)),

          checkboxInput("d_show_actual", "Overlay actual (evaluation)", TRUE),
          checkboxInput("d_show_points", "Show points", FALSE),

          tags$hr(),
          checkboxInput("d_use_lag1", "Use lag_1 (full only; usually OFF day-ahead)", FALSE)
        ),
        mainPanel(
          uiOutput("d_status"),
          plotOutput("d_plot", height = 450),
          tableOutput("d_metrics")
        )
      )
    ),

    tabPanel(
      "2) Weekly (seasonal)",
      sidebarLayout(
        sidebarPanel(
          helpText("Day-ahead forecasting for 7 days"),
          strong("Model: seasonal (fixed)"),
          selectInput("s_temp", "Temperature scenario", choices = c("clim_15m", "persistence"), selected = "clim_15m"),
          dateInput("s_start", "Week start date",
                    value = min(max_week_start, tail(available_dates, 7)[1]),
                    min = min(available_dates),
                    max = max_week_start),
          checkboxInput("s_show_actual", "Overlay actual (evaluation)", TRUE),
          checkboxInput("s_show_points", "Show points", FALSE)
        ),
        mainPanel(
          uiOutput("s_status"),
          plotOutput("s_plot", height = 450),
          tableOutput("s_metrics")
        )
      )
    ),

    tabPanel(
      "3) Weekly (full)",
      sidebarLayout(
        sidebarPanel(
          helpText("Day-ahead forecasting for 7 days"),
          strong("Model: full (fixed)"),
          selectInput("f_temp", "Temperature scenario", choices = c("clim_15m", "persistence"), selected = "clim_15m"),
          dateInput("f_start", "Week start date",
                    value = min(max_week_start, tail(available_dates, 7)[1]),
                    min = min(available_dates),
                    max = max_week_start),
          checkboxInput("f_show_actual", "Overlay actual (evaluation)", TRUE),
          checkboxInput("f_show_points", "Show points", FALSE),
          tags$hr(),
          checkboxInput("f_use_lag1", "Use lag_1 (usually OFF for stability)", FALSE)
        ),
        mainPanel(
          uiOutput("f_status"),
          plotOutput("f_plot", height = 450),
          tableOutput("f_metrics")
        )
      )
    )
  )
)


# 8) Server 

server <- function(input, output, session) {

  # TAB 1: 1 day 
  day_fc_all <- reactive({
    req(input$d_temp, input$d_day)

    models_sel <- input$d_models
    if (is.null(models_sel) || length(models_sel) == 0) {
      return(structure(simpleError("Select at least one model."), class = "error"))
    }

    res_list <- lapply(models_sel, function(m) {
      tryCatch(
        predict_day_ahead_real(
          full_df = df_bt,
          day_date = as.Date(input$d_day),
          model_type = m,
          temp_scenario = input$d_temp,
          holidays_vec = holidays_de_nationwide,
          tz = TZ_APP,
          T_base = 18,
          use_lag1 = if (m == "full") isTRUE(input$d_use_lag1) else FALSE,
          min_train_days = 365
        ),
        error = function(e) e
      )
    })

  
    first_err <- Filter(function(x) inherits(x, "error"), res_list)
    if (length(first_err) > 0) return(first_err[[1]])

    bind_rows(res_list)
  })

  output$d_status <- renderUI({
    obj <- day_fc_all()
    if (inherits(obj, "error")) return(tags$div(style="color:#b30000;", paste("Error:", obj$message)))
    tags$div(style="color:#666;", paste0("OK. Rows: ", nrow(obj),
                                        " | models: ", paste(unique(obj$model_type), collapse = ", "),
                                        " | temp: ", input$d_temp,
                                        " | date: ", as.character(input$d_day)))
  })

  output$d_plot <- renderPlot({
    obj <- day_fc_all()
    validate(need(!inherits(obj, "error"), "No forecast due to error"))
    dfp <- obj %>% arrange(time)

  
    actual <- dfp %>%
      select(time, load) %>%
      distinct()

    p <- ggplot() +
      theme_minimal() +
      labs(
        x = NULL, y = "MW",
        title = paste("Day-ahead forecast:", input$d_temp),
        subtitle = paste("Day:", as.character(input$d_day))
      )

    if (isTRUE(input$d_show_actual) && any(!is.na(actual$load))) {
      p <- p + geom_line(data = actual, aes(x = time, y = load), na.rm = TRUE)
    }

  
    p <- p +
      geom_line(
        data = dfp,
        aes(x = time, y = pred_load, colour = model_type),
        linetype = "dashed",
        na.rm = TRUE
      )

    if (isTRUE(input$d_show_points)) {
      p <- p +
        geom_point(
          data = dfp,
          aes(x = time, y = pred_load, colour = model_type),
          size = 0.7,
          na.rm = TRUE
        )
    }

    p
  })

  output$d_metrics <- renderTable({
  obj <- day_fc_all()
  if (inherits(obj, "error")) return(NULL)

  by_model <- obj %>%
    mutate(date = as.Date(date)) %>%                      
    group_by(model_type, temp_scenario, date) %>%
    group_modify(~ calc_metrics(.x)) %>%
    ungroup() %>%
    arrange(model_type) %>%
    mutate(date = format(date, "%Y-%m-%d"))                

  by_model
})



  # TAB 2: weekly seasonal 
  week_seasonal <- reactive({
    req(input$s_temp, input$s_start)

    tryCatch(
      predict_week_real(
        full_df = df_bt,
        start_date = as.Date(input$s_start),
        model_type = "seasonal",
        temp_scenario = input$s_temp,
        holidays_vec = holidays_de_nationwide,
        tz = TZ_APP,
        T_base = 18,
        use_lag1 = FALSE,
        min_train_days = 365,
        horizon_days = 7
      ),
      error = function(e) e
    )
  })

  output$s_status <- renderUI({
    obj <- week_seasonal()
    if (inherits(obj, "error")) return(tags$div(style="color:#b30000;", paste("Error:", obj$message)))
    tags$div(style="color:#666;", paste0("OK. Rows: ", nrow(obj), " (7 days × 96)"))
  })

  output$s_plot <- renderPlot({
    obj <- week_seasonal()
    validate(need(!inherits(obj, "error"), "No forecast due to error"))
    dfp <- obj %>% arrange(time)

    p <- ggplot(dfp, aes(x = time)) +
      geom_line(aes(y = pred_load), linetype = "dashed", na.rm = TRUE) +
      theme_minimal() +
      labs(
        x = NULL, y = "MW",
        title = paste("Weekly forecast: seasonal /", input$s_temp),
        subtitle = paste("Week start:", as.character(input$s_start))
      )

    if (isTRUE(input$s_show_actual) && any(!is.na(dfp$load))) p <- p + geom_line(aes(y = load), na.rm = TRUE)
    if (isTRUE(input$s_show_points)) p <- p + geom_point(aes(y = pred_load), size = 0.5, na.rm = TRUE)

    p
  })

  output$s_metrics <- renderTable({
    obj <- week_seasonal()
    if (inherits(obj, "error")) return(NULL)

    by_day <- obj %>%
      group_by(date) %>%
      group_modify(~ calc_metrics(.x)) %>%
      ungroup() %>%
      mutate(date = as.Date(date))

    overall <- calc_metrics(obj) %>% mutate(note = "overall_week")

    bind_rows(
      by_day %>% mutate(scope = "day") %>% relocate(scope, .before = 1),
      overall %>% mutate(scope = "week") %>% select(scope, everything())
    )%>%
    mutate(date = ifelse(is.na(date), "", as.character(as.Date(date))))
  })


  #TAB 3: weekly full
  week_full <- reactive({
    req(input$f_temp, input$f_start)

    tryCatch(
      predict_week_real(
        full_df = df_bt,
        start_date = as.Date(input$f_start),
        model_type = "full",
        temp_scenario = input$f_temp,
        holidays_vec = holidays_de_nationwide,
        tz = TZ_APP,
        T_base = 18,
        use_lag1 = isTRUE(input$f_use_lag1),
        min_train_days = 365,
        horizon_days = 7
      ),
      error = function(e) e
    )
  })

  output$f_status <- renderUI({
    obj <- week_full()
    if (inherits(obj, "error")) return(tags$div(style="color:#b30000;", paste("Error:", obj$message)))
    tags$div(style="color:#666;", paste0("OK. Rows: ", nrow(obj), " (7 days × 96)"))
  })

  output$f_plot <- renderPlot({
    obj <- week_full()
    validate(need(!inherits(obj, "error"), "No forecast due to error"))
    dfp <- obj %>% arrange(time)

    p <- ggplot(dfp, aes(x = time)) +
      geom_line(aes(y = pred_load), linetype = "dashed", na.rm = TRUE) +
      theme_minimal() +
      labs(
        x = NULL, y = "MW",
        title = paste("Weekly forecast: full /", input$f_temp),
        subtitle = paste("Week start:", as.character(input$f_start),
                         "| lag_1:", ifelse(isTRUE(input$f_use_lag1), "ON", "OFF"))
      )

    if (isTRUE(input$f_show_actual) && any(!is.na(dfp$load))) p <- p + geom_line(aes(y = load), na.rm = TRUE)
    if (isTRUE(input$f_show_points)) p <- p + geom_point(aes(y = pred_load), size = 0.5, na.rm = TRUE)

    p
  })

  output$f_metrics <- renderTable({
    obj <- week_full()
    if (inherits(obj, "error")) return(NULL)

    by_day <- obj %>%
      group_by(date) %>%
      group_modify(~ calc_metrics(.x)) %>%
      ungroup()%>%
      mutate(date = as.Date(date))

    overall <- calc_metrics(obj) %>% mutate(note = "overall_week")

    bind_rows(
      by_day %>% mutate(scope = "day") %>% relocate(scope, .before = 1),
      overall %>% mutate(scope = "week") %>% select(scope, everything())
    )%>%
    mutate(date = ifelse(is.na(date), "", as.character(as.Date(date))))
  })
}

shinyApp(ui, server)

Shiny applications not supported in static R Markdown documents

Electricity load forecasting

Karlova Diana

2026-02-01