setwd("D:/")
getwd()
## [1] "D:/"
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(readxl)
## Warning: package 'readxl' was built under R version 4.2.3
library(lubridate)
file_date <- format(Sys.Date(), "%Y%m%d")
file_name <- "SSEEX1.xlsx"
sheet_name <- "Sheet1"
FY_START <- as.Date("01/04/2017","%d/%m/%Y")
FY_END <- as.Date("31/03/2018","%d/%m/%Y")
DAYS_YR <- 365
MARKET_F <- "NHH"
PROFILE_F <- 1
raw <- read_excel(file_name, sheet = sheet_name)
str(raw)
## tibble [1,999 × 16] (S3: tbl_df/tbl/data.frame)
## $ CustID : chr [1:1999] "Unique ID to identify a customer" "274762" "323251" "20909100" ...
## $ Site ID : chr [1:1999] "Unique ID to identify a meter" "759212" "1067808" "459737" ...
## $ Postcode : chr [1:1999] "Region" "BA" "BH" "GL" ...
## $ Profile : chr [1:1999] "Profile Class" "3" "3" "3" ...
## $ Market : chr [1:1999] "NHH or HH" "NHH" "NHH" "NHH" ...
## $ Flexi : chr [1:1999] "Fixed or flexi contract type" "Fixed" "Fixed" "Fixed" ...
## $ Annual Consumption: chr [1:1999] "Forecasted annual consumption (kWh)" "245" "23340" "3725" ...
## $ Term : chr [1:1999] "Contract length in months" "36" "9" "12" ...
## $ Margin : chr [1:1999] "Margin p/kWh" "0.23425000000000001" "0.91762999999999995" "0.8" ...
## $ Route : chr [1:1999] "Whether the customer came to us directly or through a TPI (broker)" "TPI" "TPI" "TPI" ...
## $ Consultant : chr [1:1999] "Unique identifier to distinuguish between TPIs" "48936" "20168" "10031706" ...
## $ End_Date : chr [1:1999] "Contract end date" "43646" "43373" "43404" ...
## $ Start_Date : chr [1:1999] "Contract start date" "42552" "43101" "43040" ...
## $ DateCosted : chr [1:1999] "Date the contract was priced" "42530" "43091" "42996" ...
## $ STRUCTURE : chr [1:1999] "Code to identify type of contract" "Q1" "Q1" "Q1" ...
## $ BILLF : chr [1:1999] "Frequency of bill" "Q" "Q" "Q" ...
glimpse(raw) # all are char datatype needs to be changed
## Rows: 1,999
## Columns: 16
## $ CustID <chr> "Unique ID to identify a customer", "274762", "32…
## $ `Site ID` <chr> "Unique ID to identify a meter", "759212", "10678…
## $ Postcode <chr> "Region", "BA", "BH", "GL", "PO", "KW", "NW", "NW…
## $ Profile <chr> "Profile Class", "3", "3", "3", "3", "4", "3", "3…
## $ Market <chr> "NHH or HH", "NHH", "NHH", "NHH", "NHH", "NHH", "…
## $ Flexi <chr> "Fixed or flexi contract type", "Fixed", "Fixed",…
## $ `Annual Consumption` <chr> "Forecasted annual consumption (kWh)", "245", "23…
## $ Term <chr> "Contract length in months", "36", "9", "12", "12…
## $ Margin <chr> "Margin p/kWh", "0.23425000000000001", "0.9176299…
## $ Route <chr> "Whether the customer came to us directly or thro…
## $ Consultant <chr> "Unique identifier to distinuguish between TPIs",…
## $ End_Date <chr> "Contract end date", "43646", "43373", "43404", "…
## $ Start_Date <chr> "Contract start date", "42552", "43101", "43040",…
## $ DateCosted <chr> "Date the contract was priced", "42530", "43091",…
## $ STRUCTURE <chr> "Code to identify type of contract", "Q1", "Q1", …
## $ BILLF <chr> "Frequency of bill", "Q", "Q", "Q", "Q", "M", "M"…
names(raw) <- trimws(names(raw))
rdb <- raw %>%
slice(-1) %>% # drop description row
mutate(
Profile = as.numeric(Profile),
`Annual Consumption` = as.numeric(`Annual Consumption`),
Margin = as.numeric(Margin),
Term = as.numeric(Term),
Start_Date = as.Date(as.numeric(Start_Date), origin = "1899-12-30"),
End_Date = as.Date(as.numeric(End_Date), origin = "1899-12-30"),
DateCosted = as.Date(as.numeric( DateCosted), origin = "1899-12-30"),
Market = toupper(trimws(as.character(Market)))
)
cat("Rows loaded:", nrow(rdb), "\n")
## Rows loaded: 1998
fy_overlap_days <- function(start, end, fy_start, fy_end) {
# inclusive overlap day-count; 0 if no overlap or NA dates
if (is.na(start) || is.na(end)) return(0L)
win_start <- pmax(start, fy_start)
win_end <- pmin(end, fy_end)
if (win_end < win_start) return(0L)
as.integer(as.numeric(win_end - win_start) + 1)
}
pc1_fy <- rdb %>%
filter(Market == MARKET_F, Profile == PROFILE_F) %>%
rowwise() %>%
mutate(
Daily_Consumption = `Annual Consumption` / DAYS_YR,
FY_Days = fy_overlap_days(Start_Date, End_Date, FY_START, FY_END),
FY_Consumption = Daily_Consumption * FY_Days
) %>%
ungroup()
n_rows <- nrow(pc1_fy)
n_in_fy <- sum(pc1_fy$FY_Days > 0)
total_fy_days <- sum(pc1_fy$FY_Days)
total_kwh <- sum(pc1_fy$FY_Consumption, na.rm = TRUE)
cat("\n=================================================================\n")
##
## =================================================================
cat("Q1 : NHH, Profile Class 1, FY 2017/18 (01-Apr-2017 -> 31-Mar-2018)\n")
## Q1 : NHH, Profile Class 1, FY 2017/18 (01-Apr-2017 -> 31-Mar-2018)
cat("=================================================================\n")
## =================================================================
cat(sprintf("Matching contracts (NHH, PC1) : %d\n", n_rows))
## Matching contracts (NHH, PC1) : 306
cat(sprintf("Matching contracts (NHH, PC1) : %d\n", n_rows))
## Matching contracts (NHH, PC1) : 306
cat(sprintf("Contracts that overlap FY17/18 : %d\n", n_in_fy))
## Contracts that overlap FY17/18 : 285
cat(sprintf("Total contract-days within FY17/18 : %s\n", format(total_fy_days, big.mark = ",")))
## Total contract-days within FY17/18 : 70,239
cat(sprintf("TOTAL FY17/18 CONSUMPTION (kWh) : %s\n", format(round(total_kwh, 2), big.mark = ",")))
## TOTAL FY17/18 CONSUMPTION (kWh) : 1,120,143
cat(sprintf(" (MWh) : %s\n", format(round(total_kwh/1e3, 2), big.mark = ",")))
## (MWh) : 1,120.14
cat(sprintf(" (GWh) : %s\n", format(round(total_kwh/1e6, 4), big.mark = ",")))
## (GWh) : 1.1201
cat("\nSample rows (first 8):\n")
##
## Sample rows (first 8):
print(pc1_fy %>%
select(CustID, Start_Date, End_Date, `Annual Consumption`,
Daily_Consumption, FY_Days, FY_Consumption) %>%
head(8))
## # A tibble: 8 × 7
## CustID Start_Date End_Date `Annual Consumption` Daily_Consumption FY_Days
## <chr> <date> <date> <dbl> <dbl> <int>
## 1 21108735 2017-05-24 2019-09-30 12822 35.1 312
## 2 322235 2016-07-01 2017-06-30 5000 13.7 91
## 3 10048165 2014-05-01 2017-04-30 721 1.98 30
## 4 265759 2018-01-18 2018-07-31 9868 27.0 73
## 5 290969 2015-12-01 2017-11-30 20004 54.8 244
## 6 290270 2017-04-01 2020-03-31 40056 110. 365
## 7 366334 2017-06-01 2017-08-31 785 2.15 92
## 8 459410 2017-02-01 2019-01-31 704 1.93 365
## # ℹ 1 more variable: FY_Consumption <dbl>
out_file <- paste0("q1_fy1718_pc1_Consumtion_", file_date, ".csv")
pc1_fy %>%
select(CustID, Start_Date, End_Date, `Annual Consumption`,
Daily_Consumption, FY_Days, FY_Consumption) %>%
write_csv(out_file)
View(pc1_fy)
cat(sprintf("\nPer-row breakdown written to: %s\n", out_file))
##
## Per-row breakdown written to: q1_fy1718_pc1_Consumtion_20260416.csv