Estimate of Renewable Energy %age in Noosa LGA to end 2026

Cloned from forecast.R

This version re4moves all old test code

To see test code - go to forecast_old.R

To do:

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(dplyr)
library(ggfortify)
## Registered S3 methods overwritten by 'ggfortify':
##   method                 from    
##   autoplot.Arima         forecast
##   autoplot.acf           forecast
##   autoplot.ar            forecast
##   autoplot.bats          forecast
##   autoplot.decomposed.ts forecast
##   autoplot.ets           forecast
##   autoplot.forecast      forecast
##   autoplot.stl           forecast
##   autoplot.ts            forecast
##   fitted.ar              forecast
##   fortify.ts             forecast
##   residuals.ar           forecast
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
locality_list <- c('NOOSA SHIRE', '4563', '4565', '4566', '4567', '4568', '4569', '4571')

Define forecast function

# ============= function =============
forecast_ahead <- function(my_df, forecast_date, forecast_var, omitted_months) {
  
  start_date <- min(my_df$year_month)
  last_date <- max(my_df$year_month)
  # conditional code - omitted months param is optional
  if(missing(omitted_months)) {
    model_last_date <- last_date
    print("use last date")
  } else {
    model_last_date <- last_date  %m+% days(1) %m-% months(omitted_months)
    # filter out omitted months of input data
    my_df <- my_df %>% filter(year_month < ymd(model_last_date))
    print("cater to omitted months")
  }
  print("Model_last_date")
  print(model_last_date)
  forecast_intervals <- interval(model_last_date,forecast_date) %/% months(1)
  # transform data to time series, using extracted start date, just using the column of interest

  mts <- ts(my_df[forecast_var], frequency=12, start=c(year(start_date),month(start_date)))
  # forecasting model using arima model
  fmodel <- auto.arima(mts)
  # forecast ahead
  fts <- forecast(fmodel, forecast_intervals) 
  # make it a dataframe for easier manipulation
  df_fts <- data.frame(fts)
  # make the row name into a column
  df_fts <- cbind(rownames(df_fts), data.frame(df_fts, row.names=NULL))
  # give better names
  # NOTE ASSUMPTION THAT TIME SERIES ALWAYS IN MONTHS and NAME IS year_month
  df_fts <- rename(df_fts, year_month = 'rownames(df_fts)')
  # https://community.rstudio.com/t/pass-a-variable-to-dplyr-rename-to-change-columnname/6907
  # df %>% rename(!!variable := name_of_col_from_df)
  df_fts <- rename(df_fts, !!forecast_var := 'Point.Forecast')
  # Convert to a date (last day of month) to match other data  
  df_fts$year_month <- paste0("01 ", df_fts$year_month)
  df_fts$year_month <- as.Date(df_fts$year_month, "%d %b %Y") %m+% months(1) %m-% days(1)
return(df_fts)  
}
# =================== E N D    F U N C T I O N ================

Get solar export forecast

# read data for SOLAR EXPORT and TOTAL CONSUMPTION forecast
rdsfilename = "EQld_consumption_Noosa_LGA.rds"
Noosa_LGA_consump <- read_rds(rdsfilename) 
# ============= N O T E ========
# Energex data contains N/A values for future months
# so need to remove these from Noosa_LGA_consump
# best to do this where rds file is created
# but do it here for now
Noosa_LGA_consump <- Noosa_LGA_consump %>% drop_na()

# ========= forecast SOLAR EXPORT for all localities ==============
# create empty data frame for solar export forecast
solar_export_forecast_df <- data.frame(
          year_month = as.Date(character()),
          total_kWh = numeric() )
for (mylocality in locality_list) {
  my_LGA <- Noosa_LGA_consump[Noosa_LGA_consump$locality==mylocality,]
  my_LGA <- my_LGA %>% arrange(year_month)

  df_fts <- forecast_ahead(my_LGA, ymd("2027-01-01"), 'solar_kWh')
  # discard columns we don't really need
  df_fts_trim <- select(df_fts, year_month, solar_kWh)
  # add a locality column back in
  df_fts_trim <- add_column(df_fts_trim, locality = mylocality, .before = 1)
  # and bind it to other localities
  solar_export_forecast_df <- rbind(solar_export_forecast_df,df_fts_trim)
}
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
# check if file exists, and if so delete
rds_filename = "solar_export_forecast.rds"
if(file.exists(rds_filename)) {
  file.remove(rds_filename)
}
## [1] TRUE
saveRDS(solar_export_forecast_df,file = rds_filename)

Get total consumption forecast

# ========= forecast TOTAL CONSUMPTION for all localities ==============
# create empty data frame for solar export forecast
consumption_forecast_df <- data.frame(
  year_month = as.Date(character()),
  total_kWh = numeric() )
for (mylocality in locality_list) {
  my_LGA <- Noosa_LGA_consump[Noosa_LGA_consump$locality==mylocality,]
  my_LGA <- my_LGA %>% arrange(year_month)
  
  df_fts <- forecast_ahead(my_LGA, ymd("2027-01-01"), 'total_kWh')
  # discard columns we don't really need
  df_fts_trim <- select(df_fts, year_month, total_kWh)
  # add a locality column back in
  df_fts_trim <- add_column(df_fts_trim, locality = mylocality, .before = 1)
  # and bind it to other locailities
  consumption_forecast_df <- rbind(consumption_forecast_df,df_fts_trim)
}
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
# check if file exists, and if so delete
rds_filename = "consumption_forecast.rds"
if(file.exists(rds_filename)) {
  file.remove(rds_filename)
}
## [1] TRUE
saveRDS(consumption_forecast_df,file = rds_filename)

Forecast solar PV kW

# ========= forecast Total_PV_kW for all localities ==============
# Read data for SOLAR PV forecast
Noosa_postcode_data <- readRDS("Noosa_LGA_Postcode_CER_totals.rds")
# create empty data frame for solar export forecast
Total_PV_kW_forecast_df <- data.frame(
  year_month = as.Date(character()),
  Total_PV_kW = numeric() )
for (mylocality in locality_list) {
  my_LGA <- Noosa_postcode_data[Noosa_postcode_data$Postcode==mylocality,]
  my_LGA <- my_LGA %>% arrange(year_month)
# how many months to mit from forecast to allow CER data to stabilise....
  omitted_months <- 6
  df_fts <- forecast_ahead(my_LGA, ymd("2027-01-01"), 'Total_PV_kW', omitted_months)
  # discard columns we don't really need
  df_fts_trim <- select(df_fts, year_month, Total_PV_kW)
  # add a locality column back in
  df_fts_trim <- add_column(df_fts_trim, locality = mylocality, .before = 1)
  # and bind it to other locailities
  Total_PV_kW_forecast_df <- rbind(Total_PV_kW_forecast_df,df_fts_trim)
}
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
# check if file exists, and if so delete
rds_filename = "Total_PV_kW_forecast.rds"
if(file.exists(rds_filename)) {
  file.remove(rds_filename)
}
## [1] TRUE
saveRDS(Total_PV_kW_forecast_df,file = rds_filename)

# ================ E N D   O F   F O R E C A S T I N G ============

Forecasting done

Now apply monthly generation profile to get generation kWh

# at this point we have forecasts for all localities - consumption, export + solar_kW
# ==========================================
# Now estimate solar production
# get actual + forecast solar PV
# apply PVWatts: Monthly PV Performance Data
# but de-rate to 1kW -> 1.2 MWh annual
# PV Watts gives 1.67
# ========================
# First get Total_PV_kW - actual and forecast
# actual comes from my_postcode
Noosa_postcode_data <- readRDS("Noosa_LGA_Postcode_CER_totals.rds")
# better to rename "Postcode" to "locality" to match other datasets
Noosa_postcode_data <- rename(Noosa_postcode_data, locality = Postcode)
# now get forecast
rdsfilename = "Total_PV_kW_forecast.rds"
Total_PV_kW_forecast_df <- read_rds(rdsfilename)
# so my_postcode + Total_PV_kW_forecast_df have data for all localities....
# so can iterate over all localities.....

# create empty data frame for Total_PV_kW_df
Total_PV_kW_df <- data.frame(
  year_month = as.Date(character()),
  Total_PV_kW = numeric() )
for (mylocality in locality_list) {
  df_fts <- Total_PV_kW_forecast_df %>% filter(locality == mylocality)
  my_postcode <- Noosa_postcode_data %>% filter(locality == mylocality)
  # need to trim off overlap in forecast
  # so get "last_date" for actuals  
  last_date <- max(my_postcode$year_month)
  # now trim forecast based on last_date
  df_fts_less <- df_fts %>%
    filter(year_month > ymd(last_date))
  # just get columns from my_postcode that match forecast
  my_postcode <- select(my_postcode, locality, year_month, Total_PV_kW)
  # now bind to get all in one dataset for this locality
  my_Total_PV_kW_df <- rbind(my_postcode,df_fts_less)
  # and combine with the total for all localities
  Total_PV_kW_df <- rbind(Total_PV_kW_df,my_Total_PV_kW_df)
}
# =============> now apply monthly generation factor to each month.....
# this can be applied once on the dataframe containing all localities
kWh_factor <- c(
109.821,
102.139,
102.746,
88.205,
88.160,
77.755,
91.697,
102.336,
108.342,
110.870,
111.866,
106.062 )
Total_PV_kWh_df <- Total_PV_kW_df %>% mutate(Total_PV_kWh = Total_PV_kW * kWh_factor[month(year_month)])
# remove the kW column
Total_PV_kWh_df <- Total_PV_kWh_df %>% select(- Total_PV_kW)
# =========== cool to here - all localities =========

Now put all values in a single dataframe and calculate new columns

locality

year_month

A. Metered Energex

B. Exported Energex

C. Generated CER x generation curve

D. Self_Consume C - B

E. Total_Consume A + D

F. Grid A - B

G. Grid_RE some factor of F - maybe get from AEMO fuel mix (outside daytime)

H. Percent RE 100 x ( C + G ) / E

# ====== put all data in a
# single data frame, with observations & calculated values in separate columns
# then use ggplot as per following suggestion
# https://stackoverflow.com/questions/71844183/plotting-multiple-columns-in-r-ggplot-and-have-the-legend-right

# ==========================
# so we should end up with data frame with columns - that cover actual + forecast
# should all be trimmed to have the same start date (ie CER data)
# locality
# year_month
# A. Metered       Energex                
# B. Exported      Energex
# C. Generated     CER x generation curve
# D. Self_Consume  C - B
# E. Total_Consume A + D
# F. Grid          A - B
# G. Grid_RE       some factor of F - maybe get from AEMO fuel mix (outside daytime)
# H. Percent RE    100 x ( C + G ) / E
# =========== Following section gets metered_df + exported_df =======
rdsfilename = "EQld_consumption_Noosa_LGA.rds"
Noosa_LGA_consump <- read_rds(rdsfilename) # %>% filter(locality == mylocality)
# get rid of na
Noosa_LGA_consump <- Noosa_LGA_consump %>% drop_na()
rdsfilename = "consumption_forecast.rds"
consumption_forecast_df <- read_rds(rdsfilename)
# get actual + forecast CONSUMPTION
my_LGA <- Noosa_LGA_consump %>% 
    select(locality, year_month, total_kWh)
df_fts <- consumption_forecast_df 
# This is "A"
metered_df <- rbind(my_LGA, df_fts) %>% drop_na()
# get actual + forecast SOLAR EXPORT
rdsfilename = "solar_export_forecast.rds"
solar_export_forecast_df <- read_rds(rdsfilename)

my_LGA <- Noosa_LGA_consump %>% 
  select(locality, year_month, solar_kWh)
df_fts <- solar_export_forecast_df 
# This is "B"
exported_df <- rbind(my_LGA, df_fts) %>% drop_na()
# ============= Combine all actuals + forecast into one dataset =========
# join metered + exported
df1 <-left_join(metered_df,exported_df, by=c('locality'='locality', 'year_month'='year_month'))
# and join generated
df2 <-left_join(df1, Total_PV_kWh_df, by=c('locality'='locality', 'year_month'='year_month'))

# Note - RE_fuel_mix_pc should have ramping 
RE_fuel_mix_pc = 10
df3 <- 
  df2 %>%
  mutate(metered = total_kWh/1000000,
         exported = solar_kWh/1000000,
         generated = Total_PV_kWh/1000000) %>%
  mutate(self_consume = generated - exported,
         grid = metered - exported) %>%
  mutate(total_consume = metered + self_consume) %>%
  mutate(grid_RE = RE_fuel_mix_pc * grid / 100) %>%
  mutate(pc_RE = 100 *  (generated + grid_RE) / total_consume)
# This looks good so save it
# check if file exists, and if so delete
rds_filename = "Noosa RE pc actual and projected.rds"
if(file.exists(rds_filename)) {
  file.remove(rds_filename)
}
## [1] TRUE
saveRDS(df3,file = rds_filename)

# Now plot %RE for all localities - use facet_wrap.....
# but could plot individual localities - uncomment code below
p3 <- ggplot(df3, #%>% filter(locality == mylocality)
              aes(year_month)) +       # Create ggplot2 plot
  geom_line(aes(y = pc_RE)) +
  labs(title = "Actual/Projected % RE",
    y = "% RE", x= "year")  +
  facet_wrap(~ locality)
# p3
ggplotly(p3, tooltip = "text")
df3 %>%                               # Summary by group using dplyr
  group_by(locality) %>% 
  summarize(max_pc_RE = max(pc_RE))
## # A tibble: 9 × 2
##   locality    max_pc_RE
##   <chr>           <dbl>
## 1 4563             58.5
## 2 4565             58.4
## 3 4566             41.2
## 4 4567             36.5
## 5 4568             56.2
## 6 4569             63.1
## 7 4571             58.3
## 8 4573             NA  
## 9 NOOSA SHIRE      47.1
# Now plot SOLAR EXPORT all localities - use facet_wrap.....
# but could plot individual localities - uncomment code below
mylocality = "NOOSA SHIRE"
p3 <- ggplot(df3 %>% filter(locality == mylocality),
              aes(year_month)) +       # Create ggplot2 plot
  geom_line(aes(y = exported)) +
  labs(title = paste("Actual/Projected Solar Export",mylocality),
    y = "solar export (GWh/month)", x= "year") # +
#  facet_wrap(~ locality)
# p3
ggplotly(p3, tooltip = "text")
# Now plot GENERATED all localities - use facet_wrap.....
# but could plot individual localities - uncomment code below
mylocality = "NOOSA SHIRE"
p3 <- ggplot(df3 %>% filter(locality == mylocality),
              aes(year_month)) +       # Create ggplot2 plot
  geom_line(aes(y = generated)) +
  labs(title = paste("Actual/Projected Solar Generated",mylocality),
    y = "solar generated (GWh/month)", x= "year") # +
#  facet_wrap(~ locality)
# p3
ggplotly(p3, tooltip = "text")
# Now plot METERED all localities - use facet_wrap.....
# but could plot individual localities - uncomment code below
mylocality = "NOOSA SHIRE"
p3 <- ggplot(df3 %>% filter(locality == mylocality),
              aes(year_month)) +       # Create ggplot2 plot
  geom_line(aes(y = metered)) +
  labs(title = paste("Actual/Projected - Metered Consumption",mylocality),
    y = "metered consumption (GWh/month)", x= "year") # +
#  facet_wrap(~ locality)
# p3
ggplotly(p3, tooltip = "text")