setwd("C:/Users/Vibha/OneDrive/Desktop")
getwd()
## [1] "C:/Users/Vibha/OneDrive/Desktop"
#' Title : Q2 - NHH Profile Class 1 Margin Trend Analysis
#' Author: Vibhash
#' Date : 2026-04
#'
#' Margin (p/kWh) is set at DateCosted. We treat DateCosted as the time axis
#' and run a full time-series EDA — plots are rendered interactively to the
#' RStudio 'Plots' pane (nothing is saved to disk):
#' 1. Descriptive stats
#' 2. Raw scatter
#' 3. Monthly aggregation (mean, median, count, volume-weighted mean)
#' 4. Rolling 3/6-month mean
#' 5. Month-of-year seasonality + YoY overlay
#' 6. STL decomposition
#' 7. Stationarity (ADF) + ACF/PACF
#' 8. Segmentation (Flexi, Route, Term bucket)
#' 9. OLS linear trend fit"
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(readxl)
## Warning: package 'readxl' was built under R version 4.2.3
library(lubridate)
library(ggplot2)
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.2.3
library(fpp2) # ts(), autoplot(), stl forecast
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching packages ──────────────────────────────────────────── fpp2 2.5.1 ──
## ✔ forecast 8.21 ✔ expsmooth 2.3
## ✔ fma 2.5
## Warning: package 'fma' was built under R version 4.2.3
## Warning: package 'expsmooth' was built under R version 4.2.3
##
library(tseries) # adf.test()
library(zoo) # rollmean()
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
to_date <- function(x) {
if (inherits(x, "Date")) return(as.Date(x))
if (inherits(x, "POSIXct") || inherits(x, "POSIXt")) return(as.Date(x))
n <- suppressWarnings(as.numeric(x))
if (all(is.na(n))) return(as.Date(x))
as.Date(n, origin = "1899-12-30")
}
file_date <- format(Sys.Date(), "%Y%m%d")
file_name <- "SSEEX1.xlsx"
sheet_name <- "Sheet1"
MARKET_F <- "NHH"
PROFILE_F <- 1
out_dir <- file.path(getwd(), "q2_plots")
if (!dir.exists(out_dir)) dir.create(out_dir, recursive = TRUE)
my_theme <- theme(
plot.background = element_rect(fill = "white", color = "white"),
panel.background = element_rect(fill = "white", color = "gray60"),
panel.grid.major = element_line(color = "gray90"),
panel.grid.minor = element_blank(),
legend.background = element_rect(fill = "white", color = "white"),
legend.key = element_rect(fill = "white", colour = "white"),
plot.title = element_text(face = "bold")
)
raw <- read_excel(file_name, sheet = sheet_name)
names(raw) <- trimws(names(raw))
rdb <- raw %>%
slice(-1) %>% # drop description row
mutate(
Profile = as.numeric(Profile),
`Annual Consumption` = as.numeric(`Annual Consumption`),
Margin = as.numeric(Margin),
Term = as.numeric(Term),
Start_Date = as.Date(as.numeric(Start_Date), origin = "1899-12-30"),
End_Date = as.Date(as.numeric(End_Date), origin = "1899-12-30"),
DateCosted = as.Date(as.numeric(DateCosted), origin = "1899-12-30"),
Market = toupper(trimws(as.character(Market)))
)
pc1 <- rdb %>%
filter(Market == MARKET_F, Profile == PROFILE_F) %>%
filter(!is.na(Margin), !is.na(DateCosted)) %>%
arrange(DateCosted)
label_txt <- paste0(MARKET_F, " - Profile Class ", PROFILE_F)
cat("=================================================================\n")
## =================================================================
cat("Descriptive stats -", label_txt, "\n")
## Descriptive stats - NHH - Profile Class 1
cat("=================================================================\n")
## =================================================================
cat(sprintf("Rows : %d\n", nrow(pc1)))
## Rows : 306
cat(sprintf("DateCosted range : %s -> %s\n",
min(pc1$DateCosted), max(pc1$DateCosted)))
## DateCosted range : 2013-07-29 -> 2018-03-27
cat(sprintf("Margin mean / median : %.4f / %.4f\n",
mean(pc1$Margin), median(pc1$Margin)))
## Margin mean / median : 0.2331 / 0.1770
cat(sprintf("Margin std / min / max : %.4f / %.4f / %.4f\n",
sd(pc1$Margin), min(pc1$Margin), max(pc1$Margin)))
## Margin std / min / max : 0.3108 / -1.2745 / 1.7342
# ==== SECTION 7 — Raw Scatter ======================================
p1 <- ggplot(pc1, aes(DateCosted, Margin)) +
geom_point(alpha = 0.35, size = 1.6, color = "steelblue") +
labs(title = paste("Raw margin vs DateCosted -", label_txt),
x = "DateCosted", y = "Margin (p/kWh)") +
my_theme
p1 <- ggplot(pc1, aes(DateCosted, Margin)) +
geom_point(alpha = 0.35, size = 1.6, color = "steelblue") +
labs(title = paste("Raw margin vs DateCosted -", label_txt),
x = "DateCosted", y = "Margin (p/kWh)") +
my_theme
print(p1)

monthly <- pc1 %>%
mutate(month = floor_date(DateCosted, "month")) %>%
group_by(month) %>%
summarise(
mean_margin = mean(Margin),
median_margin = median(Margin),
count = n(),
vol_weighted = sum(Margin * `Annual Consumption`) /
sum(`Annual Consumption`),
.groups = "drop"
)
full_months <- tibble(month = seq(min(monthly$month),
max(monthly$month),
by = "month"))
monthly <- full_months %>% left_join(monthly, by = "month")
p2 <- ggplot(monthly, aes(month)) +
geom_line (aes(y = mean_margin, color = "Mean")) +
geom_point(aes(y = mean_margin, color = "Mean")) +
geom_line (aes(y = median_margin, color = "Median"), alpha = 0.7) +
geom_line (aes(y = vol_weighted, color = "Vol-weighted"), alpha = 0.7) +
scale_color_manual(values = c(Mean = "steelblue",
Median = "darkorange",
`Vol-weighted` = "forestgreen")) +
labs(title = paste("Monthly margin -", label_txt),
x = "Month", y = "Margin (p/kWh)", color = "") +
my_theme
print(p2)
## Warning: Removed 10 rows containing missing values (`geom_point()`).

p2b <- ggplot(monthly, aes(month, count)) +
geom_col(fill = "steelblue", alpha = 0.7) +
labs(title = paste("Contracts priced per month -", label_txt),
x = "Month", y = "Count") + my_theme
print(p2b)
## Warning: Removed 10 rows containing missing values (`position_stack()`).

monthly <- monthly %>%
mutate(
roll3 = rollmean(mean_margin, k = 3, fill = NA, align = "center"),
roll6 = rollmean(mean_margin, k = 6, fill = NA, align = "center")
)
p3 <- ggplot(monthly, aes(month)) +
geom_line(aes(y = mean_margin, color = "Monthly mean"), alpha = 0.4) +
geom_line(aes(y = roll3, color = "3-mo rolling")) +
geom_line(aes(y = roll6, color = "6-mo rolling")) +
scale_color_manual(values = c(`Monthly mean` = "gray50",
`3-mo rolling` = "steelblue",
`6-mo rolling` = "firebrick")) +
labs(title = paste("Rolling mean -", label_txt),
x = "Month", y = "Margin (p/kWh)", color = "") +
my_theme
print(p3)
## Warning: Removed 16 rows containing missing values (`geom_line()`).
## Warning: Removed 19 rows containing missing values (`geom_line()`).

pc1 <- pc1 %>% mutate(Year = year(DateCosted), Month = month(DateCosted))
p4 <- ggplot(pc1, aes(factor(Month), Margin)) +
geom_boxplot(fill = "lightblue", outlier.size = 0.8) +
labs(title = paste("Margin by month of year -", label_txt),
x = "Month", y = "Margin (p/kWh)") + my_theme
print(p4)

yoy <- pc1 %>%
group_by(Year, Month) %>%
summarise(mean_margin = mean(Margin), .groups = "drop") %>%
mutate(Year = factor(Year))
p4b <- ggplot(yoy, aes(Month, mean_margin, color = Year, group = Year)) +
geom_line() + geom_point() +
scale_x_continuous(breaks = 1:12) +
labs(title = paste("YoY overlay of monthly mean margin -", label_txt),
x = "Month", y = "Margin (p/kWh)") + my_theme
print(p4b)

ts_vals <- monthly$mean_margin
if (any(is.na(ts_vals))) ts_vals <- zoo::na.approx(ts_vals, na.rm = FALSE)
ts_vals[is.na(ts_vals)] <- mean(ts_vals, na.rm = TRUE) # edge fill
start_yr <- year(min(monthly$month))
start_mo <- month(min(monthly$month))
margin_ts <- ts(ts_vals, frequency = 12, start = c(start_yr, start_mo))
if (length(margin_ts) >= 24) {
stl_fit <- stl(margin_ts, t.window = 13, s.window = "periodic", robust = TRUE)
plot(stl_fit, main = paste("STL decomposition -", label_txt))
} else {
cat("STL decomposition skipped - need >=24 months.\n")
}

# ==== SECTION 12 — ADF Stationarity Test ==========================
cat("\n=================================================================\n")
##
## =================================================================
cat("Stationarity & autocorrelation -", label_txt, "\n")
## Stationarity & autocorrelation - NHH - Profile Class 1
cat("=================================================================\n")
## =================================================================
adf_res <- tseries::adf.test(margin_ts, alternative = "stationary")
cat(sprintf("ADF statistic : %.4f\n", unname(adf_res$statistic)))
## ADF statistic : -3.7823
cat(sprintf("ADF p-value : %.4f --> %s\n",
adf_res$p.value,
ifelse(adf_res$p.value < 0.05, "stationary",
"NON-stationary (trend / unit root likely)")))
## ADF p-value : 0.0258 --> stationary
par(mfrow = c(1, 2))
acf (margin_ts, lag.max = 24, main = paste("ACF -", label_txt))
pacf(margin_ts, lag.max = 24, main = paste("PACF -", label_txt))

par(mfrow = c(1, 1))
monthly_lm <- monthly %>% filter(!is.na(mean_margin)) %>%
mutate(t = row_number())
fit <- lm(mean_margin ~ t, data = monthly_lm)
coef_fit <- coef(fit)
p_val <- summary(fit)$coefficients["t", "Pr(>|t|)"]
r2 <- summary(fit)$r.squared
cat("\nOLS on monthly mean margin:\n")
##
## OLS on monthly mean margin:
cat(sprintf(" slope : %+.5f p/kWh per month (~%+.4f p/kWh per year)\n",
coef_fit["t"], coef_fit["t"] * 12))
## slope : -0.00038 p/kWh per month (~-0.0046 p/kWh per year)
cat(sprintf(" R-squared : %.3f\n", r2))
## R-squared : 0.002
cat(sprintf(" p-value : %.4f --> %s\n", p_val,
ifelse(p_val < 0.05, "significant trend",
"no significant linear trend")))
## p-value : 0.7717 --> no significant linear trend
p5 <- ggplot(monthly_lm, aes(month, mean_margin)) +
geom_line(alpha = 0.6) + geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE, color = "firebrick", linetype = "dashed") +
labs(title = sprintf("Linear trend fit - %s (slope=%+.4f, p=%.3f)",
label_txt, coef_fit["t"], p_val),
x = "Month", y = "Margin (p/kWh)") + my_theme
print(p5)
## `geom_smooth()` using formula = 'y ~ x'

pc1_seg <- pc1 %>%
mutate(Term_bucket = cut(Term, breaks = c(0, 12, 24, 36, 120),
labels = c("<=12m", "13-24m", "25-36m", ">36m")))
p6 <- ggplot(pc1_seg, aes(Flexi, Margin)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Margin by Flexi/Fixed", x = "", y = "Margin (p/kWh)") + my_theme
p7 <- ggplot(pc1_seg, aes(Route, Margin)) +
geom_boxplot(fill = "lightgreen") +
labs(title = "Margin by Route", x = "", y = "Margin (p/kWh)") + my_theme
p8 <- ggplot(pc1_seg %>% filter(!is.na(Term_bucket)), aes(Term_bucket, Margin)) +
geom_boxplot(fill = "wheat") +
labs(title = "Margin by Contract Term", x = "", y = "Margin (p/kWh)") + my_theme
if (requireNamespace("patchwork", quietly = TRUE)) {
library(patchwork)
print(p6 + p7 + p8)
} else {
print(p6)
print(p7)
print(p8)
}



cat("\nMean margin by segment:\n\nFlexi:\n")
##
## Mean margin by segment:
##
## Flexi:
print(pc1_seg %>% group_by(Flexi) %>%
summarise(mean = round(mean(Margin), 4), n = n()))
## # A tibble: 1 × 3
## Flexi mean n
## <chr> <dbl> <int>
## 1 Fixed 0.233 306
cat("\nRoute:\n")
##
## Route:
print(pc1_seg %>% group_by(Route) %>%
summarise(mean = round(mean(Margin), 4), n = n()))
## # A tibble: 2 × 3
## Route mean n
## <chr> <dbl> <int>
## 1 DIRECT 0.346 22
## 2 TPI 0.224 284
cat("\nTerm bucket:\n")
##
## Term bucket:
print(pc1_seg %>% filter(!is.na(Term_bucket)) %>%
group_by(Term_bucket) %>%
summarise(mean = round(mean(Margin), 4), n = n()))
## # A tibble: 4 × 3
## Term_bucket mean n
## <fct> <dbl> <int>
## 1 <=12m 0.221 107
## 2 13-24m 0.266 102
## 3 25-36m 0.196 82
## 4 >36m 0.295 15
cat("\nDone. All plots rendered to the RStudio Plots pane — use the\n")
##
## Done. All plots rendered to the RStudio Plots pane — use the
cat("arrows at the top of the Plots pane to flick through them.\n")
## arrows at the top of the Plots pane to flick through them.