setwd("D:/")
getwd()
## [1] "D:/"
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(readxl)
## Warning: package 'readxl' was built under R version 4.2.3
library(lubridate)
file_date  <- format(Sys.Date(), "%Y%m%d")
file_name  <- "SSEEX1.xlsx"
sheet_name <- "Sheet1"

FY_START   <- as.Date("01/04/2017","%d/%m/%Y")
FY_END     <- as.Date("31/03/2018","%d/%m/%Y")
DAYS_YR    <- 365

MARKET_F   <- "NHH"
PROFILE_F  <- 1
raw <- read_excel(file_name, sheet = sheet_name)
str(raw)
## tibble [1,999 × 16] (S3: tbl_df/tbl/data.frame)
##  $ CustID            : chr [1:1999] "Unique ID to identify a customer" "274762" "323251" "20909100" ...
##  $ Site ID           : chr [1:1999] "Unique ID to identify a meter" "759212" "1067808" "459737" ...
##  $ Postcode          : chr [1:1999] "Region" "BA" "BH" "GL" ...
##  $ Profile           : chr [1:1999] "Profile Class" "3" "3" "3" ...
##  $ Market            : chr [1:1999] "NHH or HH" "NHH" "NHH" "NHH" ...
##  $ Flexi             : chr [1:1999] "Fixed or flexi contract type" "Fixed" "Fixed" "Fixed" ...
##  $ Annual Consumption: chr [1:1999] "Forecasted annual consumption (kWh)" "245" "23340" "3725" ...
##  $ Term              : chr [1:1999] "Contract length in months" "36" "9" "12" ...
##  $ Margin            : chr [1:1999] "Margin p/kWh" "0.23425000000000001" "0.91762999999999995" "0.8" ...
##  $ Route             : chr [1:1999] "Whether the customer came to us directly or through a TPI (broker)" "TPI" "TPI" "TPI" ...
##  $ Consultant        : chr [1:1999] "Unique identifier to distinuguish between TPIs" "48936" "20168" "10031706" ...
##  $ End_Date          : chr [1:1999] "Contract end date" "43646" "43373" "43404" ...
##  $ Start_Date        : chr [1:1999] "Contract start date" "42552" "43101" "43040" ...
##  $ DateCosted        : chr [1:1999] "Date the contract was priced" "42530" "43091" "42996" ...
##  $ STRUCTURE         : chr [1:1999] "Code to identify type of contract" "Q1" "Q1" "Q1" ...
##  $ BILLF             : chr [1:1999] "Frequency of bill" "Q" "Q" "Q" ...
glimpse(raw) # all are char datatype needs to be changed
## Rows: 1,999
## Columns: 16
## $ CustID               <chr> "Unique ID to identify a customer", "274762", "32…
## $ `Site ID`            <chr> "Unique ID to identify a meter", "759212", "10678…
## $ Postcode             <chr> "Region", "BA", "BH", "GL", "PO", "KW", "NW", "NW…
## $ Profile              <chr> "Profile Class", "3", "3", "3", "3", "4", "3", "3…
## $ Market               <chr> "NHH or HH", "NHH", "NHH", "NHH", "NHH", "NHH", "…
## $ Flexi                <chr> "Fixed or flexi contract type", "Fixed", "Fixed",…
## $ `Annual Consumption` <chr> "Forecasted annual consumption (kWh)", "245", "23…
## $ Term                 <chr> "Contract length in months", "36", "9", "12", "12…
## $ Margin               <chr> "Margin p/kWh", "0.23425000000000001", "0.9176299…
## $ Route                <chr> "Whether the customer came to us directly or thro…
## $ Consultant           <chr> "Unique identifier to distinuguish between TPIs",…
## $ End_Date             <chr> "Contract end date", "43646", "43373", "43404", "…
## $ Start_Date           <chr> "Contract start date", "42552", "43101", "43040",…
## $ DateCosted           <chr> "Date the contract was priced", "42530", "43091",…
## $ STRUCTURE            <chr> "Code to identify type of contract", "Q1", "Q1", …
## $ BILLF                <chr> "Frequency of bill", "Q", "Q", "Q", "Q", "M", "M"…
names(raw) <- trimws(names(raw))

rdb <- raw %>%
  slice(-1) %>%                                      # drop description row
  mutate(
    Profile            = as.numeric(Profile),
    `Annual Consumption` = as.numeric(`Annual Consumption`),
    Margin             = as.numeric(Margin),
    Term               = as.numeric(Term),
    Start_Date = as.Date(as.numeric(Start_Date), origin = "1899-12-30"),
    End_Date   = as.Date(as.numeric(End_Date),   origin = "1899-12-30"),
    DateCosted = as.Date(as.numeric( DateCosted), origin = "1899-12-30"),
    Market             = toupper(trimws(as.character(Market)))
  )

cat("Rows loaded:", nrow(rdb), "\n")
## Rows loaded: 1998
fy_overlap_days <- function(start, end, fy_start, fy_end) {
  # inclusive overlap day-count; 0 if no overlap or NA dates
  if (is.na(start) || is.na(end)) return(0L)
  win_start <- pmax(start, fy_start)
  win_end   <- pmin(end,   fy_end)
  if (win_end < win_start) return(0L)
  as.integer(as.numeric(win_end - win_start) + 1)
}

pc1_fy <- rdb %>%
  filter(Market == MARKET_F, Profile == PROFILE_F) %>%
  rowwise() %>%
  mutate(
    Daily_Consumption = `Annual Consumption` / DAYS_YR,
    FY_Days           = fy_overlap_days(Start_Date, End_Date, FY_START, FY_END),
    FY_Consumption    = Daily_Consumption * FY_Days
  ) %>%
  ungroup()
n_rows        <- nrow(pc1_fy)
n_in_fy       <- sum(pc1_fy$FY_Days > 0)
total_fy_days <- sum(pc1_fy$FY_Days)
total_kwh     <- sum(pc1_fy$FY_Consumption, na.rm = TRUE)

cat("\n=================================================================\n")
## 
## =================================================================
cat("Q1 : NHH, Profile Class 1, FY 2017/18 (01-Apr-2017 -> 31-Mar-2018)\n")
## Q1 : NHH, Profile Class 1, FY 2017/18 (01-Apr-2017 -> 31-Mar-2018)
cat("=================================================================\n")
## =================================================================
cat(sprintf("Matching contracts (NHH, PC1)       : %d\n", n_rows))
## Matching contracts (NHH, PC1)       : 306
cat(sprintf("Matching contracts (NHH, PC1)       : %d\n", n_rows))
## Matching contracts (NHH, PC1)       : 306
cat(sprintf("Contracts that overlap FY17/18      : %d\n", n_in_fy))
## Contracts that overlap FY17/18      : 285
cat(sprintf("Total contract-days within FY17/18  : %s\n", format(total_fy_days, big.mark = ",")))
## Total contract-days within FY17/18  : 70,239
cat(sprintf("TOTAL FY17/18 CONSUMPTION (kWh)     : %s\n", format(round(total_kwh, 2), big.mark = ",")))
## TOTAL FY17/18 CONSUMPTION (kWh)     : 1,120,143
cat(sprintf("                          (MWh)     : %s\n", format(round(total_kwh/1e3,  2), big.mark = ",")))
##                           (MWh)     : 1,120.14
cat(sprintf("                          (GWh)     : %s\n", format(round(total_kwh/1e6,  4), big.mark = ",")))
##                           (GWh)     : 1.1201
cat("\nSample rows (first 8):\n")
## 
## Sample rows (first 8):
print(pc1_fy %>%
        select(CustID, Start_Date, End_Date, `Annual Consumption`,
               Daily_Consumption, FY_Days, FY_Consumption) %>%
        head(8))
## # A tibble: 8 × 7
##   CustID   Start_Date End_Date   `Annual Consumption` Daily_Consumption FY_Days
##   <chr>    <date>     <date>                    <dbl>             <dbl>   <int>
## 1 21108735 2017-05-24 2019-09-30                12822             35.1      312
## 2 322235   2016-07-01 2017-06-30                 5000             13.7       91
## 3 10048165 2014-05-01 2017-04-30                  721              1.98      30
## 4 265759   2018-01-18 2018-07-31                 9868             27.0       73
## 5 290969   2015-12-01 2017-11-30                20004             54.8      244
## 6 290270   2017-04-01 2020-03-31                40056            110.       365
## 7 366334   2017-06-01 2017-08-31                  785              2.15      92
## 8 459410   2017-02-01 2019-01-31                  704              1.93     365
## # ℹ 1 more variable: FY_Consumption <dbl>
out_file <- paste0("q1_fy1718_pc1_Consumtion_", file_date, ".csv")
pc1_fy %>%
  select(CustID, Start_Date, End_Date, `Annual Consumption`,
         Daily_Consumption, FY_Days, FY_Consumption) %>%
  write_csv(out_file)
View(pc1_fy)
cat(sprintf("\nPer-row breakdown written to: %s\n", out_file))
## 
## Per-row breakdown written to: q1_fy1718_pc1_Consumtion_20260416.csv