# Packages
library(dplyr)
##
## Attache Paket: 'dplyr'
## Die folgenden Objekte sind maskiert von 'package:stats':
##
## filter, lag
## Die folgenden Objekte sind maskiert von 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(stringr)
library(lubridate)
##
## Attache Paket: 'lubridate'
## Die folgenden Objekte sind maskiert von 'package:base':
##
## date, intersect, setdiff, union
library(readr)
library(purrr)
library(tibble)
library(ggplot2)
library(scales)
##
## Attache Paket: 'scales'
## Das folgende Objekt ist maskiert 'package:purrr':
##
## discard
## Das folgende Objekt ist maskiert 'package:readr':
##
## col_factor
library(patchwork)
library(zoo)
##
## Attache Paket: 'zoo'
## Die folgenden Objekte sind maskiert von 'package:base':
##
## as.Date, as.Date.numeric
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(timeDate)
library(glmnet)
## Lade nötiges Paket: Matrix
##
## Attache Paket: 'Matrix'
## Die folgenden Objekte sind maskiert von 'package:tidyr':
##
## expand, pack, unpack
## Loaded glmnet 4.1-10
library(ncdf4)
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
library(rnaturalearth)
library(shiny)
# Electricity Load Data
read_one_year_clean <- function(path, year, tz = "Europe/Berlin") {
lines <- readr::read_lines(path)
if (length(lines) == 0) stop("Empty file: ", path)
lines <- lines[-1]
tibble(line = lines) %>%
mutate(
line = str_remove(line, '^"'),
line = str_remove(line, '"$')
) %>%
separate(
col = line,
into = c("time_range", "Area", "Actual_Total_Load", "Day_ahead_Total_Load_Forecast"),
sep = ",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)",
remove = TRUE
) %>%
mutate(
year = as.integer(year),
Area = str_remove_all(Area, '""'),
Actual_Total_Load = as.numeric(str_remove_all(Actual_Total_Load, '""')),
Day_ahead_Total_Load_Forecast = as.numeric(str_remove_all(Day_ahead_Total_Load_Forecast, '""')),
time = as.POSIXct(
str_extract(time_range, "^.*(?= -)"),
format = "%d/%m/%Y %H:%M",
tz = tz
)
) %>%
select(year, time, Area,
load = Actual_Total_Load,
forecast = Day_ahead_Total_Load_Forecast) %>%
arrange(time)
}
years <- 2015:2025
files <- setNames(
paste0("electro", years, ".csv"),
as.character(years)
)
all_data <- bind_rows(
lapply(names(files), function(y) read_one_year_clean(files[[y]], y))
) %>%
arrange(time)
str(all_data)
## tibble [385,728 × 5] (S3: tbl_df/tbl/data.frame)
## $ year : int [1:385728] 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
## $ time : POSIXct[1:385728], format: "2015-01-01 00:00:00" "2015-01-01 00:15:00" ...
## $ Area : chr [1:385728] "Germany (DE)" "Germany (DE)" "Germany (DE)" "Germany (DE)" ...
## $ load : num [1:385728] 45274 44870 44964 44894 44442 ...
## $ forecast: num [1:385728] 42955 42412 41901 41355 40710 ...
head(all_data)
## # A tibble: 6 × 5
## year time Area load forecast
## <int> <dttm> <chr> <dbl> <dbl>
## 1 2015 2015-01-01 00:00:00 Germany (DE) 45274. 42955.
## 2 2015 2015-01-01 00:15:00 Germany (DE) 44870. 42412.
## 3 2015 2015-01-01 00:30:00 Germany (DE) 44964. 41901.
## 4 2015 2015-01-01 00:45:00 Germany (DE) 44894. 41355.
## 5 2015 2015-01-01 01:00:00 Germany (DE) 44442. 40710.
## 6 2015 2015-01-01 01:15:00 Germany (DE) 44057. 40204.
sum(is.na(all_data$load))
## [1] 0
analyze_year <- function(df, freq = 96) {
load_ts <- ts(df$load, frequency = freq)
decomp <- stl(load_ts, s.window = "periodic")
adf <- adf.test(decomp$time.series[, "remainder"])
tibble(
statistic = unname(adf$statistic),
p_value = adf$p.value,
lags = adf$parameter
)
}
adf_by_year <- all_data %>%
group_by(year) %>%
group_modify(~ analyze_year(.x, freq = 96)) %>%
ungroup()
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
## Warning in adf.test(decomp$time.series[, "remainder"]): p-value smaller than
## printed p-value
adf_by_year
## # A tibble: 11 × 4
## year statistic p_value lags
## <int> <dbl> <dbl> <dbl>
## 1 2015 -37.6 0.01 32
## 2 2016 -37.8 0.01 32
## 3 2017 -37.7 0.01 32
## 4 2018 -36.7 0.01 32
## 5 2019 -36.3 0.01 32
## 6 2020 -37.6 0.01 32
## 7 2021 -37.4 0.01 32
## 8 2022 -36.6 0.01 32
## 9 2023 -35.7 0.01 32
## 10 2024 -35.6 0.01 32
## 11 2025 -35.3 0.01 32
years <- sort(unique(all_data$year))
#Load plot
plot_load_hourly <- function(df, title = "Electricity load (Germany)") {
df_h <- df %>%
filter(!is.na(load), is.finite(load), !is.na(time)) %>%
mutate(time_hour = floor_date(time, "hour")) %>%
group_by(time_hour) %>%
summarise(load = mean(load, na.rm = TRUE), .groups = "drop")
ggplot(df_h, aes(x = time_hour, y = load)) +
geom_line(linewidth = 0.4, alpha = 0.9) +
scale_x_datetime(date_breaks = "1 month", date_labels = "%b %Y") +
scale_y_continuous(labels = scales::comma) +
labs(
title = title,
subtitle = "Hourly average (smoothed for readability)",
x = NULL,
y = "Load (MW)"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold"),
panel.grid.minor = element_blank()
)
}
#STL decomposition plot
plot_stl_pretty <- function(df, freq = 96, title = "STL decomposition") {
df <- df %>%
filter(!is.na(load), is.finite(load), !is.na(time)) %>%
arrange(time)
stopifnot(nrow(df) > freq * 2)
ts_load <- ts(df$load, frequency = freq)
decomp <- stl(ts_load, s.window = "periodic")
stl_df <- tibble(
time = df$time,
data = as.numeric(ts_load),
seasonal = as.numeric(decomp$time.series[, "seasonal"]),
trend = as.numeric(decomp$time.series[, "trend"]),
remainder = as.numeric(decomp$time.series[, "remainder"])
)
p1 <- ggplot(stl_df, aes(time, data)) +
geom_line(linewidth = 0.25, alpha = 0.85) +
scale_y_continuous(labels = comma) +
labs(title = title, subtitle = "Observed", x = NULL, y = "MW") +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(face = "bold"),
panel.grid.minor = element_blank())
p2 <- ggplot(stl_df, aes(time, seasonal)) +
geom_line(linewidth = 0.25, alpha = 0.85) +
labs(subtitle = "Seasonal", x = NULL, y = NULL) +
theme_minimal(base_size = 12) +
theme(panel.grid.minor = element_blank())
p3 <- ggplot(stl_df, aes(time, trend)) +
geom_line(linewidth = 0.35, alpha = 0.9) +
labs(subtitle = "Trend", x = NULL, y = NULL) +
theme_minimal(base_size = 12) +
theme(panel.grid.minor = element_blank())
p4 <- ggplot(stl_df, aes(time, remainder)) +
geom_line(linewidth = 0.25, alpha = 0.85) +
labs(subtitle = "Remainder", x = NULL, y = NULL) +
theme_minimal(base_size = 12) +
theme(panel.grid.minor = element_blank())
p1 / p2 / p3 / p4 + plot_layout(heights = c(1.2, 1, 1, 1))
}
df_2025 <- all_data %>% filter(year == 2025)
p_load <- plot_load_hourly(df_2025, title = "Germany electricity load – 2025")
p_stl <- plot_stl_pretty(df_2025, freq = 96, title = "Germany electricity load – 2025 (STL)")
print(p_load)

print(p_stl)

# Temperature function
read_era5_t2m_DE <- function(nc_path, tz_out = "Europe/Berlin") {
nc <- ncdf4::nc_open(nc_path)
on.exit(ncdf4::nc_close(nc), add = TRUE)
lat <- ncdf4::ncvar_get(nc, "latitude")
time_raw <- ncdf4::ncvar_get(nc, "valid_time")
time_units <- ncdf4::ncatt_get(nc, "valid_time", "units")$value
origin_str <- sub(".*since\\s+", "", time_units)
origin_str <- sub("\\.0$", "", origin_str)
origin <- as.POSIXct(origin_str, tz = "UTC")
if (grepl("^hours", time_units)) {
time_utc <- origin + lubridate::dhours(time_raw)
} else if (grepl("^seconds", time_units)) {
time_utc <- origin + lubridate::dseconds(time_raw)
} else {
stop("Unknown valid_time units: ", time_units)
}
# temperature
t2m <- ncdf4::ncvar_get(nc, "t2m")
dim_names <- sapply(nc$var[["t2m"]]$dim, `[[`, "name")
# weights by latitude
w_lat <- cos(lat * pi / 180)
w_lat <- w_lat / sum(w_lat)
# average over longitude
if (identical(dim_names, c("longitude", "latitude", "valid_time"))) {
# t2m[lon, lat, time] -> mean over lon -> [lat, time]
mean_lon <- apply(t2m, c(2, 3), mean, na.rm = TRUE)
} else if (identical(dim_names, c("valid_time", "latitude", "longitude"))) {
# t2m[time, lat, lon] -> mean over lon -> [time, lat]
mean_lon <- apply(t2m, c(1, 2), mean, na.rm = TRUE)
} else if (identical(dim_names, c("latitude", "longitude", "valid_time"))) {
# t2m[lat, lon, time] -> mean over lon -> [lat, time]
mean_lon <- apply(t2m, c(1, 3), mean, na.rm = TRUE)
} else {
stop("Unexpected t2m dimension order: ", paste(dim_names, collapse = ", "))
}
mean_lon <- as.matrix(mean_lon)
# weight over latitude
if (nrow(mean_lon) == length(w_lat)) {
# [lat, time]
mean_area <- colSums(sweep(mean_lon, 1, w_lat, `*`), na.rm = TRUE)
} else if (ncol(mean_lon) == length(w_lat)) {
# [time, lat]
mean_area <- rowSums(sweep(mean_lon, 2, w_lat, `*`), na.rm = TRUE)
} else {
stop("Latitude dimension mismatch: nrow=", nrow(mean_lon),
" ncol=", ncol(mean_lon),
" lat_len=", length(w_lat))
}
tibble::tibble(
time = lubridate::with_tz(time_utc, tz_out),
temp_c = as.numeric(mean_area - 273.15)
) %>%
arrange(time)
}
files <- list.files(
pattern = "^Temperature.*\\.nc$",
full.names = TRUE
)
files <- files[order(basename(files))]
temp_all <- purrr::map_dfr(
files,
~ read_era5_t2m_DE(.x) %>%
mutate(
source_file = basename(.x),
year = lubridate::year(time)
)
) %>%
arrange(time) %>%
filter(year >= 2015, year <= 2025)
glimpse(temp_all)
## Rows: 96,382
## Columns: 4
## $ time <dttm> 2015-01-01 01:00:00, 2015-01-01 02:00:00, 2015-01-01 03:0…
## $ temp_c <dbl> 0.6985498, 0.6214059, 0.5340014, 0.4823854, 0.4143815, 0.3…
## $ source_file <chr> "Temperature2015.nc", "Temperature2015.nc", "Temperature20…
## $ year <dbl> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015…
print(table(temp_all$year))
##
## 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
## 8759 8784 8760 8760 8760 8784 8760 8760 8760 8784 8711
summary(temp_all$temp_c)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -12.236 4.317 9.591 10.062 15.533 32.559
# Germany grid map
plot_era5_grid_over_germany <- function(nc_path) {
nc <- ncdf4::nc_open(nc_path)
on.exit(ncdf4::nc_close(nc), add = TRUE)
lat <- ncdf4::ncvar_get(nc, "latitude")
lon <- ncdf4::ncvar_get(nc, "longitude")
grid <- expand.grid(longitude = lon, latitude = lat)
de <- rnaturalearth::ne_countries(country = "Germany", returnclass = "sf")
ggplot() +
geom_sf(data = de, fill = NA, linewidth = 0.6) +
geom_point(data = grid, aes(longitude, latitude), size = 0.15, alpha = 0.35) +
coord_sf(xlim = range(lon), ylim = range(lat), expand = FALSE) +
labs(
title = "ERA5 grid points over Germany",
x = "Longitude", y = "Latitude"
) +
theme_minimal(base_size = 12) +
theme(panel.grid.minor = element_blank())
}
p_map <- plot_era5_grid_over_germany(files[1])
print(p_map)

# Conversion of hourly temp to 15-min
temp_15m <- temp_all %>%
group_by(year) %>%
arrange(time, .by_group = TRUE) %>%
complete(time = seq(min(time), max(time), by = "15 mins")) %>%
arrange(time, .by_group = TRUE) %>%
mutate(temp_c = zoo::na.locf(temp_c, na.rm = FALSE)) %>%
ungroup()
sum(is.na(temp_15m$temp_c))
## [1] 0
glimpse(temp_15m)
## Rows: 385,495
## Columns: 4
## $ year <dbl> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015…
## $ time <dttm> 2015-01-01 01:00:00, 2015-01-01 01:15:00, 2015-01-01 01:3…
## $ temp_c <dbl> 0.6985498, 0.6985498, 0.6985498, 0.6985498, 0.6214059, 0.6…
## $ source_file <chr> "Temperature2015.nc", NA, NA, NA, "Temperature2015.nc", NA…
#Events
years <- seq(year(min(all_data$time, na.rm = TRUE)),
year(max(all_data$time, na.rm = TRUE)))
# Nationwide German public holidays
# Fixed-date nationwide holidays:
# - Jan 1 (New Year)
# - May 1 (Labour Day)
# - Oct 3 (German Unity Day)
# - Dec 25 (1.Christmas Day)
# - Dec 26 (2.Christmas Day)
fixed_holidays <- as.Date(c(
paste0(years, "-01-01"),
paste0(years, "-05-01"),
paste0(years, "-10-03"),
paste0(years, "-12-25"),
paste0(years, "-12-26")
))
# Movable nationwide holidays (based on Easter):
# - Good Friday (Easter - 2)
# - Easter Monday (Easter + 1)
# - Ascension Day (Easter + 39)
# - Whit Monday / Pfingstmontag (Easter + 50)
easter_dates <- as.Date(Easter(years))
movable_holidays <- as.Date(c(
easter_dates - 2, # Good Friday
easter_dates + 1, # Easter Monday
easter_dates + 39, # Ascension Day
easter_dates + 50 # Whit Monday
))
# Final holiday set
holidays_de_nationwide <- unique(c(fixed_holidays, movable_holidays))
#Adding the holidays to the dataset
df <- all_data %>%
mutate(
date = as.Date(time),
# Weekend (Saturday, Sunday)
is_weekend = as.integer(wday(date, week_start = 1) %in% c(6, 7)),
# Nationwide holiday
is_holiday = as.integer(date %in% holidays_de_nationwide),
# Workday = not weekend and not holiday
is_workday = as.integer(is_weekend == 0 & is_holiday == 0),
# Time features
hour = hour(time),
dow = wday(time, week_start = 1), # day of the week
month = month(time),
# Day/Night
is_night = as.integer(hour < 7 | hour >= 22),
is_day = as.integer(!is_night),
# 4-part time of day
time_of_day = case_when(
hour < 6 ~ "night",
hour < 12 ~ "morning",
hour < 18 ~ "day",
TRUE ~ "evening"
),
time_of_day = factor(time_of_day,
levels = c("night", "morning", "day", "evening"))
)
#Sanity checks
df %>% count(is_weekend, is_holiday, is_workday)
## # A tibble: 4 × 4
## is_weekend is_holiday is_workday n
## <int> <int> <int> <int>
## 1 0 0 1 267456
## 2 0 1 0 8064
## 3 1 0 0 108768
## 4 1 1 0 1440
df %>%
group_by(is_night) %>%
summarise(mean_load = mean(load, na.rm = TRUE), .groups = "drop")
## # A tibble: 2 × 2
## is_night mean_load
## <int> <dbl>
## 1 0 60627.
## 2 1 47764.
df %>%
filter(is_holiday == 1) %>%
distinct(date) %>%
arrange(date) %>%
head(30)
## # A tibble: 30 × 1
## date
## <date>
## 1 2015-01-01
## 2 2015-04-03
## 3 2015-04-06
## 4 2015-05-01
## 5 2015-05-14
## 6 2015-05-25
## 7 2015-10-03
## 8 2015-12-25
## 9 2015-12-26
## 10 2016-01-01
## # ℹ 20 more rows
######################################################################################################################################################################################################################################################################################################################################################################################################
#Filter to common period
df_2015_2025 <- df %>%
filter(year >= 2015, year <= 2025)
temp_2015_2025 <- temp_15m %>%
filter(year >= 2015, year <= 2025)
#Filling the data
temp_2015_2025_clean <- temp_2015_2025 %>%
select(time, temp_c) %>%
arrange(time) %>%
distinct(time, .keep_all = TRUE) %>%
mutate(
temp_c = zoo::na.locf(temp_c, na.rm = FALSE),
temp_c = zoo::na.locf(temp_c, fromLast = TRUE, na.rm = FALSE)
)
#Merging the data
df_model_2015_2025 <- df_2015_2025 %>%
left_join(temp_2015_2025_clean, by = "time") %>%
mutate(
log_load = log(load),
date = as.Date(with_tz(time, "Europe/Berlin"))
) %>%
arrange(time)
cat("Rows:", nrow(df_model_2015_2025), "\n")
## Rows: 385728
cat("Missing temp_c:", sum(is.na(df_model_2015_2025$temp_c)), "\n")
## Missing temp_c: 233
range(df_model_2015_2025$time)
## [1] "2015-01-01 00:00:00 CET" "2025-12-31 23:45:00 CET"
table(df_model_2015_2025$year)
##
## 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
## 35040 35136 35040 35040 35040 35136 35040 35040 35040 35136 35040
######################################################################################################################################################################################################################################################################################################################################################################################################
#Check for correlation
vars_for_cor <- df_model_2015_2025 %>%
select(
log_load,
temp_c,
is_weekend,
is_holiday,
is_night
)
cor(vars_for_cor, use = "complete.obs")
## log_load temp_c is_weekend is_holiday is_night
## log_load 1.0000000 -0.153321837 -4.592725e-01 -1.730015e-01 -6.293411e-01
## temp_c -0.1533218 1.000000000 -8.893770e-04 -3.554656e-02 -1.934827e-01
## is_weekend -0.4592725 -0.000889377 1.000000e+00 -4.727218e-02 -2.074786e-06
## is_holiday -0.1730015 -0.035546561 -4.727218e-02 1.000000e+00 2.183079e-05
## is_night -0.6293411 -0.193482680 -2.074786e-06 2.183079e-05 1.000000e+00
# log_load vs night dummy
boxplot(log_load ~ is_night,
data = df_model_2015_2025,
names = c("Day", "Night"),
main = "Day vs Night electricity consumption",
ylab = "log(load)")

# log_load vs weekend
boxplot(log_load ~ is_weekend,
data = df_model_2015_2025,
names = c("Weekday", "Weekend"),
main = "Weekday vs Weekend electricity consumption",
ylab = "log(load)")

mm1 <- lm(
log_load ~ temp_c + is_night + is_weekend + is_holiday,
data = df_model_2015_2025
)
summary(mm1)
##
## Call:
## lm(formula = log_load ~ temp_c + is_night + is_weekend + is_holiday,
## data = df_model_2015_2025)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.46483 -0.06083 0.00513 0.06522 0.49381
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.115e+01 3.274e-04 34041.7 <2e-16 ***
## temp_c -7.457e-03 2.174e-05 -342.9 <2e-16 ***
## is_night -2.590e-01 3.231e-04 -801.4 <2e-16 ***
## is_weekend -1.897e-01 3.400e-04 -558.0 <2e-16 ***
## is_holiday -2.422e-01 9.915e-04 -244.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09529 on 385490 degrees of freedom
## (233 Beobachtungen als fehlend gelöscht)
## Multiple R-squared: 0.728, Adjusted R-squared: 0.728
## F-statistic: 2.579e+05 on 4 and 385490 DF, p-value: < 2.2e-16
# A1) Heating/Cooling degree variables
T_base <- 18
df_model_2015_2025 <- df_model_2015_2025 %>%
mutate(
HDD = pmax(0, T_base - temp_c),
CDD = pmax(0, temp_c - T_base)
)
# Sanity check
summary(df_model_2015_2025$HDD)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 2.466 8.408 8.459 13.683 30.236 233
summary(df_model_2015_2025$CDD)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 0.000 0.000 0.522 0.000 14.559 233
# A2) Lags
df_model_2015_2025 <- df_model_2015_2025 %>%
arrange(time) %>%
mutate(
lag_1 = dplyr::lag(log_load, 1),
lag_96 = dplyr::lag(log_load, 96),
lag_672 = dplyr::lag(log_load, 672)
)
colSums(is.na(df_model_2015_2025[, c("lag_1","lag_96","lag_672")]))
## lag_1 lag_96 lag_672
## 1 96 672
#Seasonal Fourier Features: sin/cos (day/week/year)
df_model_2015_2025 <- df_model_2015_2025 %>%
mutate(
time_local = with_tz(time, "Europe/Berlin"),
#15-min slot within day
slot_day = hour(time_local) * 4 + minute(time_local) %/% 15 + 1,
sin_day = sin(2 * pi * slot_day / 96),
cos_day = cos(2 * pi * slot_day / 96),
#week cycle
dow = wday(time_local, week_start = 1),
sin_week = sin(2 * pi * dow / 7),
cos_week = cos(2 * pi * dow / 7),
#year cycle
doy = yday(time_local),
sin_year = sin(2 * pi * doy / 365),
cos_year = cos(2 * pi * doy / 365)
) %>%
select(-time_local, -slot_day)
summary(select(
df_model_2015_2025,
sin_day, cos_day,
sin_week, cos_week,
sin_year, cos_year
))
## sin_day cos_day sin_week cos_week
## Min. :-1.0000 Min. :-1.0000 Min. :-0.9749 Min. :-0.9010
## 1st Qu.:-0.7071 1st Qu.:-0.7071 1st Qu.:-0.7818 1st Qu.:-0.9010
## Median : 0.0000 Median : 0.0000 Median : 0.0000 Median :-0.2225
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.7071 3rd Qu.: 0.7071 3rd Qu.: 0.7818 3rd Qu.: 0.6235
## Max. : 1.0000 Max. : 1.0000 Max. : 0.9749 Max. : 1.0000
## sin_year cos_year
## Min. :-0.9999907 Min. :-0.9999630
## 1st Qu.:-0.7055836 1st Qu.:-0.7086267
## Median : 0.0000000 Median : 0.0043035
## Mean :-0.0002024 Mean : 0.0007908
## 3rd Qu.: 0.7055836 3rd Qu.: 0.7025275
## Max. : 0.9999907 Max. : 1.0000000
m_seasonal_linear <- lm(
log_load ~
temp_c +
is_night +
is_weekend +
is_holiday,
data = df_model_2015_2025
)
summary(m_seasonal_linear)
##
## Call:
## lm(formula = log_load ~ temp_c + is_night + is_weekend + is_holiday,
## data = df_model_2015_2025)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.46483 -0.06083 0.00513 0.06522 0.49381
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.115e+01 3.274e-04 34041.7 <2e-16 ***
## temp_c -7.457e-03 2.174e-05 -342.9 <2e-16 ***
## is_night -2.590e-01 3.231e-04 -801.4 <2e-16 ***
## is_weekend -1.897e-01 3.400e-04 -558.0 <2e-16 ***
## is_holiday -2.422e-01 9.915e-04 -244.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09529 on 385490 degrees of freedom
## (233 Beobachtungen als fehlend gelöscht)
## Multiple R-squared: 0.728, Adjusted R-squared: 0.728
## F-statistic: 2.579e+05 on 4 and 385490 DF, p-value: < 2.2e-16
par(mfrow = c(1,2))
plot(
fitted(m_seasonal_linear),
resid(m_seasonal_linear),
main = "Seasonal linear: Residuals vs Fitted",
xlab = "Fitted", ylab = "Residuals"
)
hist(
resid(m_seasonal_linear),
breaks = 50,
main = "Seasonal linear: Residuals distribution",
xlab = "Residuals"
)

m_full <- lm(
log_load ~
HDD + CDD +
is_night + is_weekend + is_holiday +
sin_day + cos_day +
sin_week + cos_week +
sin_year + cos_year +
lag_1 + lag_96 + lag_672,
data = df_model_2015_2025
)
summary(m_full)
##
## Call:
## lm(formula = log_load ~ HDD + CDD + is_night + is_weekend + is_holiday +
## sin_day + cos_day + sin_week + cos_week + sin_year + cos_year +
## lag_1 + lag_96 + lag_672, data = df_model_2015_2025)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33150 -0.00654 -0.00162 0.00471 0.30760
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.165e-02 2.262e-03 40.51 <2e-16 ***
## HDD 2.291e-04 6.031e-06 38.00 <2e-16 ***
## CDD 1.949e-04 1.181e-05 16.50 <2e-16 ***
## is_night -4.024e-03 7.161e-05 -56.19 <2e-16 ***
## is_weekend -3.506e-03 8.288e-05 -42.30 <2e-16 ***
## is_holiday -6.808e-03 1.247e-04 -54.60 <2e-16 ***
## sin_day 8.299e-03 3.964e-05 209.39 <2e-16 ***
## cos_day -5.871e-03 4.171e-05 -140.74 <2e-16 ***
## sin_week 7.182e-04 2.805e-05 25.61 <2e-16 ***
## cos_week 8.395e-04 4.393e-05 19.11 <2e-16 ***
## sin_year -5.078e-04 2.854e-05 -17.79 <2e-16 ***
## cos_year -7.879e-04 5.016e-05 -15.71 <2e-16 ***
## lag_1 9.667e-01 3.243e-04 2980.52 <2e-16 ***
## lag_96 6.237e-03 2.370e-04 26.32 <2e-16 ***
## lag_672 1.875e-02 2.583e-04 72.60 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01023 on 384812 degrees of freedom
## (901 Beobachtungen als fehlend gelöscht)
## Multiple R-squared: 0.9969, Adjusted R-squared: 0.9969
## F-statistic: 8.739e+06 on 14 and 384812 DF, p-value: < 2.2e-16
summary(m_seasonal_linear)$r.squared
## [1] 0.72798
summary(m_full)$r.squared
## [1] 0.9968647
# BACKTEST
df_bt <- df_model_2015_2025 %>%
arrange(time) %>%
filter(!is.na(log_load), !is.na(temp_c))
predict_one_day <- function(train_df, test_df, formula) {
fit <- lm(formula, data = train_df)
pred_log <- predict(fit, newdata = test_df)
test_df %>%
mutate(pred_log = pred_log,
pred = exp(pred_log))
}
backtest_day_ahead <- function(df, dates, formula) {
res <- vector("list", length(dates))
for (i in seq_along(dates)) {
d <- dates[i]
train_df <- df %>% filter(date < d)
test_df <- df %>% filter(date == d)
if (nrow(train_df) < 365 * 96 || nrow(test_df) != 96) {
res[[i]] <- NULL
next
}
pred_df <- predict_one_day(train_df, test_df, formula)
res[[i]] <- pred_df %>%
select(time, load, pred) %>%
mutate(date = d)
}
bind_rows(res)
}
f_seasonal_linear <- log_load ~
temp_c +
is_night +
is_weekend +
is_holiday
f_full <- log_load ~
HDD + CDD +
is_night + is_weekend + is_holiday +
sin_day + cos_day +
sin_week + cos_week +
sin_year + cos_year +
lag_1 + lag_96 + lag_672
f_full_no_lag1 <- log_load ~
HDD + CDD +
is_night + is_weekend + is_holiday +
sin_day + cos_day +
sin_week + cos_week +
sin_year + cos_year +
lag_96 + lag_672
dates_bt <- seq(as.Date("2020-01-01"),
as.Date("2020-03-31"),
by = "day")
bt_seasonal <- backtest_day_ahead(df_bt, dates_bt, f_seasonal_linear)
bt_full <- backtest_day_ahead(df_bt, dates_bt, f_full)
bt_full_no_lag1 <- backtest_day_ahead(df_bt, dates_bt, f_full_no_lag1)
calc_metrics <- function(df) {
y_true <- NULL
y_hat <- NULL
if ("load" %in% names(df)) y_true <- df$load
if ("pred_load" %in% names(df)) y_hat <- df$pred_load
if (is.null(y_hat) && "pred" %in% names(df)) y_hat <- df$pred
if (is.null(y_true) || is.null(y_hat)) {
return(tibble(note = "Metrics cannot be computed"))
}
if (!any(!is.na(y_true))) {
return(tibble(note = "No actual load available"))
}
tibble(
n = sum(!is.na(y_true) & !is.na(y_hat)),
rmse = sqrt(mean((y_hat - y_true)^2, na.rm = TRUE)),
mae = mean(abs(y_hat - y_true), na.rm = TRUE),
mape = mean(abs(y_hat - y_true) / pmax(y_true, 1e-6), na.rm = TRUE) * 100
)
}
metrics_seasonal <- calc_metrics(bt_seasonal)
metrics_full <- calc_metrics(bt_full)
metrics_full_no_lag1 <- calc_metrics(bt_full_no_lag1)
metrics_full_no_lag1
## # A tibble: 1 × 4
## n rmse mae mape
## <int> <dbl> <dbl> <dbl>
## 1 8736 3041. 2246. 3.81
metrics_seasonal
## # A tibble: 1 × 4
## n rmse mae mape
## <int> <dbl> <dbl> <dbl>
## 1 8736 5087. 4066. 6.95
metrics_full
## # A tibble: 1 × 4
## n rmse mae mape
## <int> <dbl> <dbl> <dbl>
## 1 8736 617. 449. 0.738
d_show <- as.Date("2020-01-15")
ggplot() +
geom_line(data = bt_seasonal %>% filter(date == d_show),
aes(time, load), colour = "black") +
geom_line(data = bt_seasonal %>% filter(date == d_show),
aes(time, pred), colour = "red", linetype = "dashed") +
geom_line(data = bt_full %>% filter(date == d_show),
aes(time, pred), colour = "blue", linetype = "dashed") +
geom_line(data = bt_full_no_lag1 %>% filter(date == d_show),
aes(time, pred), colour = "darkgreen", linetype = "dashed") +
labs(
title = "Day-ahead forecast comparison",
subtitle = "Black=actual, Red=seasonal linear, Blue=full (with lag_1), Green=full (no lag_1)",
y = "Load (MW)", x = NULL
) +
theme_minimal()

#Shiny
# Safety
assert_has_cols <- function(df, cols, df_name = "df") {
miss <- setdiff(cols, names(df))
if (length(miss) > 0) stop(df_name, " is missing columns: ", paste(miss, collapse = ", "))
invisible(TRUE)
}
# Temperature
make_temp_climatology_15m <- function(train_df, tz = "Europe/Berlin") {
assert_has_cols(train_df, c("time", "temp_c"), "train_df")
train_df %>%
filter(!is.na(temp_c)) %>%
mutate(
time_local = with_tz(time, tz),
doy = yday(time_local),
slot15 = hour(time_local) * 4 + minute(time_local) %/% 15 + 1
) %>%
group_by(doy, slot15) %>%
summarise(temp_c_clim = mean(temp_c), .groups = "drop")
}
add_temp_by_scenario <- function(future_df, full_df, train_df,
scenario = c("clim_15m", "persistence"),
tz = "Europe/Berlin") {
scenario <- match.arg(scenario)
assert_has_cols(future_df, c("time"), "future_df")
assert_has_cols(full_df, c("time","temp_c"), "full_df")
assert_has_cols(train_df, c("time","temp_c"), "train_df")
if (scenario == "clim_15m") {
clim <- make_temp_climatology_15m(train_df, tz = tz)
out <- future_df %>%
mutate(
time_local = with_tz(time, tz),
doy = yday(time_local),
slot15 = hour(time_local) * 4 + minute(time_local) %/% 15 + 1
) %>%
left_join(clim, by = c("doy","slot15")) %>%
transmute(time = time, temp_c = temp_c_clim)
} else { # persistence
tmp <- full_df %>%
arrange(time) %>%
mutate(temp_c_lag96 = dplyr::lag(temp_c, 96)) %>%
select(time, temp_c_lag96)
out <- future_df %>%
left_join(tmp, by = "time") %>%
transmute(time = time, temp_c = temp_c_lag96)
}
out %>%
arrange(time) %>%
mutate(
temp_c = zoo::na.locf(temp_c, na.rm = FALSE),
temp_c = zoo::na.locf(temp_c, fromLast = TRUE, na.rm = FALSE)
)
}
# 2) Parameters
make_future_features <- function(future_df, holidays_vec,
tz = "Europe/Berlin", T_base = 18) {
assert_has_cols(future_df, c("time","temp_c"), "future_df")
future_df %>%
arrange(time) %>%
mutate(
time_local = with_tz(time, tz),
date = as.Date(time_local),
is_weekend = as.integer(wday(date, week_start = 1) %in% c(6,7)),
is_holiday = as.integer(date %in% holidays_vec),
is_night = as.integer(hour(time_local) < 7 | hour(time_local) >= 22),
HDD = pmax(0, T_base - temp_c),
CDD = pmax(0, temp_c - T_base),
slot_day = hour(time_local) * 4 + minute(time_local) %/% 15 + 1,
sin_day = sin(2*pi*slot_day/96),
cos_day = cos(2*pi*slot_day/96),
dow = wday(time_local, week_start = 1),
sin_week = sin(2*pi*dow/7),
cos_week = cos(2*pi*dow/7),
doy = yday(time_local),
sin_year = sin(2*pi*doy/365),
cos_year = cos(2*pi*doy/365)
) %>%
select(-time_local, -slot_day)
}
# 3) Models
get_model_formula <- function(model_type = c("seasonal", "full"),
use_lag1 = FALSE) {
model_type <- match.arg(model_type)
if (model_type == "seasonal") {
fml <- log_load ~
temp_c +
is_night + is_weekend + is_holiday +
sin_day + cos_day +
sin_week + cos_week +
sin_year + cos_year
}
if (model_type == "full") {
if (use_lag1) {
fml <- log_load ~
HDD + CDD +
is_night + is_weekend + is_holiday +
sin_day + cos_day +
sin_week + cos_week +
sin_year + cos_year +
lag_1 + lag_96 + lag_672
} else {
fml <- log_load ~
HDD + CDD +
is_night + is_weekend + is_holiday +
sin_day + cos_day +
sin_week + cos_week +
sin_year + cos_year +
lag_96 + lag_672
}
}
fml
}
# 4) Day-ahead forecasting
predict_day_ahead_real <- function(full_df, day_date,
model_type = c("seasonal","full"),
temp_scenario = c("clim_15m","persistence"),
holidays_vec,
tz = "Europe/Berlin",
T_base = 18,
use_lag1 = FALSE,
min_train_days = 365) {
model_type <- match.arg(model_type)
temp_scenario <- match.arg(temp_scenario)
assert_has_cols(full_df, c("time","load","log_load","temp_c"), "full_df")
cutoff <- as.POSIXct(paste0(as.Date(day_date), " 00:00:00"), tz = tz)
train_df <- full_df %>% filter(time < cutoff)
if (nrow(train_df) < min_train_days * 96) {
stop("Not enough training history before ", as.character(day_date),
". Need at least ", min_train_days, " days.")
}
future_times <- full_df %>%
mutate(date_local = as.Date(with_tz(time, tz))) %>%
filter(date_local == as.Date(day_date)) %>%
arrange(time) %>%
pull(time)
if (length(future_times) == 0) stop("No timestamps found for selected date.")
future_df <- tibble(time = future_times)
future_df <- add_temp_by_scenario(
future_df = future_df,
full_df = full_df,
train_df = train_df,
scenario = temp_scenario,
tz = tz
)
future_df <- make_future_features(
future_df,
holidays_vec = holidays_vec,
tz = tz,
T_base = T_base
)
lag_map <- full_df %>%
arrange(time) %>%
mutate(
lag_1 = dplyr::lag(log_load, 1),
lag_96 = dplyr::lag(log_load, 96),
lag_672 = dplyr::lag(log_load, 672)
) %>%
select(time, lag_1, lag_96, lag_672)
future_df <- future_df %>% left_join(lag_map, by = "time")
fml <- get_model_formula(model_type = model_type, use_lag1 = use_lag1)
needed <- setdiff(all.vars(fml), "log_load")
assert_has_cols(train_df, c("log_load", needed), "train_df")
train_fit <- train_df %>% select(log_load, all_of(needed)) %>% drop_na()
model <- lm(fml, data = train_fit)
pred_log <- predict(model, newdata = future_df)
out <- future_df %>%
mutate(
pred_log_load = as.numeric(pred_log),
pred_load = exp(pred_log_load)
)
actual_df <- full_df %>%
mutate(date_local = as.Date(with_tz(time, tz))) %>%
filter(date_local == as.Date(day_date)) %>%
select(time, load)
out %>%
left_join(actual_df, by = "time") %>%
mutate(
date = as.Date(day_date),
model_type = model_type,
temp_scenario = temp_scenario
)
}
# 5) Day-ahead forecasting for 7 days
predict_week_real <- function(full_df, start_date,
model_type = c("seasonal","full"),
temp_scenario = c("clim_15m","persistence"),
holidays_vec,
tz = "Europe/Berlin",
T_base = 18,
use_lag1 = FALSE,
min_train_days = 365,
horizon_days = 7) {
model_type <- match.arg(model_type)
temp_scenario <- match.arg(temp_scenario)
days <- seq(as.Date(start_date), by = "day", length.out = horizon_days)
res <- lapply(days, function(d) {
predict_day_ahead_real(
full_df = full_df,
day_date = d,
model_type = model_type,
temp_scenario = temp_scenario,
holidays_vec = holidays_vec,
tz = tz,
T_base = T_base,
use_lag1 = use_lag1,
min_train_days = min_train_days
)
})
bind_rows(res)
}
calc_metrics <- function(df) {
if (!any(!is.na(df$load))) {
return(tibble(note = "No actual load available"))
}
tibble(
n = sum(!is.na(df$load) & !is.na(df$pred_load)),
rmse = sqrt(mean((df$pred_load - df$load)^2, na.rm = TRUE)),
mae = mean(abs(df$pred_load - df$load), na.rm = TRUE),
mape = mean(abs(df$pred_load - df$load) / pmax(df$load, 1e-6), na.rm = TRUE) * 100
)
}
# 6) Data
TZ_APP <- "Europe/Berlin"
df_bt <- df_model_2015_2025 %>%
filter(!is.na(temp_c), !is.na(load), !is.na(log_load)) %>%
arrange(time)
available_dates <- df_bt %>%
mutate(date_local = as.Date(with_tz(time, TZ_APP))) %>%
distinct(date_local) %>%
arrange(date_local) %>%
pull(date_local)
max_week_start <- max(available_dates) - 6
# 7) UI: pages 1-3 (Day-ahead forecasting, Day-ahead forecasting for 7 days)
ui <- fluidPage(
titlePanel("Real forecast (no leakage)"),
tabsetPanel(
tabPanel(
"1) Day-ahead (1 day)",
sidebarLayout(
sidebarPanel(
helpText("The forecast for the selected day is built only from the past"),
checkboxGroupInput(
"d_models",
"Models to show",
choices = c("seasonal", "full"),
selected = c("seasonal", "full")
),
selectInput("d_temp", "Temperature scenario",
choices = c("clim_15m", "persistence"),
selected = "clim_15m"),
dateInput("d_day", "Forecast day",
value = tail(available_dates, 1),
min = min(available_dates),
max = max(available_dates)),
checkboxInput("d_show_actual", "Overlay actual (evaluation)", TRUE),
checkboxInput("d_show_points", "Show points", FALSE),
tags$hr(),
checkboxInput("d_use_lag1", "Use lag_1 (full only; usually OFF day-ahead)", FALSE)
),
mainPanel(
uiOutput("d_status"),
plotOutput("d_plot", height = 450),
tableOutput("d_metrics")
)
)
),
tabPanel(
"2) Weekly (seasonal)",
sidebarLayout(
sidebarPanel(
helpText("Day-ahead forecasting for 7 days"),
strong("Model: seasonal (fixed)"),
selectInput("s_temp", "Temperature scenario", choices = c("clim_15m", "persistence"), selected = "clim_15m"),
dateInput("s_start", "Week start date",
value = min(max_week_start, tail(available_dates, 7)[1]),
min = min(available_dates),
max = max_week_start),
checkboxInput("s_show_actual", "Overlay actual (evaluation)", TRUE),
checkboxInput("s_show_points", "Show points", FALSE)
),
mainPanel(
uiOutput("s_status"),
plotOutput("s_plot", height = 450),
tableOutput("s_metrics")
)
)
),
tabPanel(
"3) Weekly (full)",
sidebarLayout(
sidebarPanel(
helpText("Day-ahead forecasting for 7 days"),
strong("Model: full (fixed)"),
selectInput("f_temp", "Temperature scenario", choices = c("clim_15m", "persistence"), selected = "clim_15m"),
dateInput("f_start", "Week start date",
value = min(max_week_start, tail(available_dates, 7)[1]),
min = min(available_dates),
max = max_week_start),
checkboxInput("f_show_actual", "Overlay actual (evaluation)", TRUE),
checkboxInput("f_show_points", "Show points", FALSE),
tags$hr(),
checkboxInput("f_use_lag1", "Use lag_1 (usually OFF for stability)", FALSE)
),
mainPanel(
uiOutput("f_status"),
plotOutput("f_plot", height = 450),
tableOutput("f_metrics")
)
)
)
)
)
# 8) Server
server <- function(input, output, session) {
# TAB 1: 1 day
day_fc_all <- reactive({
req(input$d_temp, input$d_day)
models_sel <- input$d_models
if (is.null(models_sel) || length(models_sel) == 0) {
return(structure(simpleError("Select at least one model."), class = "error"))
}
res_list <- lapply(models_sel, function(m) {
tryCatch(
predict_day_ahead_real(
full_df = df_bt,
day_date = as.Date(input$d_day),
model_type = m,
temp_scenario = input$d_temp,
holidays_vec = holidays_de_nationwide,
tz = TZ_APP,
T_base = 18,
use_lag1 = if (m == "full") isTRUE(input$d_use_lag1) else FALSE,
min_train_days = 365
),
error = function(e) e
)
})
first_err <- Filter(function(x) inherits(x, "error"), res_list)
if (length(first_err) > 0) return(first_err[[1]])
bind_rows(res_list)
})
output$d_status <- renderUI({
obj <- day_fc_all()
if (inherits(obj, "error")) return(tags$div(style="color:#b30000;", paste("Error:", obj$message)))
tags$div(style="color:#666;", paste0("OK. Rows: ", nrow(obj),
" | models: ", paste(unique(obj$model_type), collapse = ", "),
" | temp: ", input$d_temp,
" | date: ", as.character(input$d_day)))
})
output$d_plot <- renderPlot({
obj <- day_fc_all()
validate(need(!inherits(obj, "error"), "No forecast due to error"))
dfp <- obj %>% arrange(time)
actual <- dfp %>%
select(time, load) %>%
distinct()
p <- ggplot() +
theme_minimal() +
labs(
x = NULL, y = "MW",
title = paste("Day-ahead forecast:", input$d_temp),
subtitle = paste("Day:", as.character(input$d_day))
)
if (isTRUE(input$d_show_actual) && any(!is.na(actual$load))) {
p <- p + geom_line(data = actual, aes(x = time, y = load), na.rm = TRUE)
}
p <- p +
geom_line(
data = dfp,
aes(x = time, y = pred_load, colour = model_type),
linetype = "dashed",
na.rm = TRUE
)
if (isTRUE(input$d_show_points)) {
p <- p +
geom_point(
data = dfp,
aes(x = time, y = pred_load, colour = model_type),
size = 0.7,
na.rm = TRUE
)
}
p
})
output$d_metrics <- renderTable({
obj <- day_fc_all()
if (inherits(obj, "error")) return(NULL)
by_model <- obj %>%
mutate(date = as.Date(date)) %>%
group_by(model_type, temp_scenario, date) %>%
group_modify(~ calc_metrics(.x)) %>%
ungroup() %>%
arrange(model_type) %>%
mutate(date = format(date, "%Y-%m-%d"))
by_model
})
# TAB 2: weekly seasonal
week_seasonal <- reactive({
req(input$s_temp, input$s_start)
tryCatch(
predict_week_real(
full_df = df_bt,
start_date = as.Date(input$s_start),
model_type = "seasonal",
temp_scenario = input$s_temp,
holidays_vec = holidays_de_nationwide,
tz = TZ_APP,
T_base = 18,
use_lag1 = FALSE,
min_train_days = 365,
horizon_days = 7
),
error = function(e) e
)
})
output$s_status <- renderUI({
obj <- week_seasonal()
if (inherits(obj, "error")) return(tags$div(style="color:#b30000;", paste("Error:", obj$message)))
tags$div(style="color:#666;", paste0("OK. Rows: ", nrow(obj), " (7 days × 96)"))
})
output$s_plot <- renderPlot({
obj <- week_seasonal()
validate(need(!inherits(obj, "error"), "No forecast due to error"))
dfp <- obj %>% arrange(time)
p <- ggplot(dfp, aes(x = time)) +
geom_line(aes(y = pred_load), linetype = "dashed", na.rm = TRUE) +
theme_minimal() +
labs(
x = NULL, y = "MW",
title = paste("Weekly forecast: seasonal /", input$s_temp),
subtitle = paste("Week start:", as.character(input$s_start))
)
if (isTRUE(input$s_show_actual) && any(!is.na(dfp$load))) p <- p + geom_line(aes(y = load), na.rm = TRUE)
if (isTRUE(input$s_show_points)) p <- p + geom_point(aes(y = pred_load), size = 0.5, na.rm = TRUE)
p
})
output$s_metrics <- renderTable({
obj <- week_seasonal()
if (inherits(obj, "error")) return(NULL)
by_day <- obj %>%
group_by(date) %>%
group_modify(~ calc_metrics(.x)) %>%
ungroup() %>%
mutate(date = as.Date(date))
overall <- calc_metrics(obj) %>% mutate(note = "overall_week")
bind_rows(
by_day %>% mutate(scope = "day") %>% relocate(scope, .before = 1),
overall %>% mutate(scope = "week") %>% select(scope, everything())
)%>%
mutate(date = ifelse(is.na(date), "", as.character(as.Date(date))))
})
#TAB 3: weekly full
week_full <- reactive({
req(input$f_temp, input$f_start)
tryCatch(
predict_week_real(
full_df = df_bt,
start_date = as.Date(input$f_start),
model_type = "full",
temp_scenario = input$f_temp,
holidays_vec = holidays_de_nationwide,
tz = TZ_APP,
T_base = 18,
use_lag1 = isTRUE(input$f_use_lag1),
min_train_days = 365,
horizon_days = 7
),
error = function(e) e
)
})
output$f_status <- renderUI({
obj <- week_full()
if (inherits(obj, "error")) return(tags$div(style="color:#b30000;", paste("Error:", obj$message)))
tags$div(style="color:#666;", paste0("OK. Rows: ", nrow(obj), " (7 days × 96)"))
})
output$f_plot <- renderPlot({
obj <- week_full()
validate(need(!inherits(obj, "error"), "No forecast due to error"))
dfp <- obj %>% arrange(time)
p <- ggplot(dfp, aes(x = time)) +
geom_line(aes(y = pred_load), linetype = "dashed", na.rm = TRUE) +
theme_minimal() +
labs(
x = NULL, y = "MW",
title = paste("Weekly forecast: full /", input$f_temp),
subtitle = paste("Week start:", as.character(input$f_start),
"| lag_1:", ifelse(isTRUE(input$f_use_lag1), "ON", "OFF"))
)
if (isTRUE(input$f_show_actual) && any(!is.na(dfp$load))) p <- p + geom_line(aes(y = load), na.rm = TRUE)
if (isTRUE(input$f_show_points)) p <- p + geom_point(aes(y = pred_load), size = 0.5, na.rm = TRUE)
p
})
output$f_metrics <- renderTable({
obj <- week_full()
if (inherits(obj, "error")) return(NULL)
by_day <- obj %>%
group_by(date) %>%
group_modify(~ calc_metrics(.x)) %>%
ungroup()%>%
mutate(date = as.Date(date))
overall <- calc_metrics(obj) %>% mutate(note = "overall_week")
bind_rows(
by_day %>% mutate(scope = "day") %>% relocate(scope, .before = 1),
overall %>% mutate(scope = "week") %>% select(scope, everything())
)%>%
mutate(date = ifelse(is.na(date), "", as.character(as.Date(date))))
})
}
shinyApp(ui, server)
Shiny applications not supported in static R Markdown documents