Cloned from forecast.R
This version re4moves all old test code
To see test code - go to forecast_old.R
To do:
Make this an RMarkdown - done (this doc)
ramping factor for % RE from grid over time, ie %RE excluding rooftop contribution
can use
https://www.energy.gov.au/publications/australian-energy-statistics-table-o-electricity-generation-fuel-type-2020-21-and-2021
Useful for %RE and contribution of rooftop PV
Plus assumptions re RE for Qld grid to 2030, ie 50%, and Fed's 82%
and assumptions about how much woukd be for non-rooftop contributionslimit how much solar export gets added to local consumption over time
nah - logic looks ok. only self-consump gets addedcalc what model produces re self-consumption %age
save final results so can be plotted, listed elsewhere
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(dplyr)
library(ggfortify)
## Registered S3 methods overwritten by 'ggfortify':
## method from
## autoplot.Arima forecast
## autoplot.acf forecast
## autoplot.ar forecast
## autoplot.bats forecast
## autoplot.decomposed.ts forecast
## autoplot.ets forecast
## autoplot.forecast forecast
## autoplot.stl forecast
## autoplot.ts forecast
## fitted.ar forecast
## fortify.ts forecast
## residuals.ar forecast
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
locality_list <- c('NOOSA SHIRE', '4563', '4565', '4566', '4567', '4568', '4569', '4571')
Define forecast function
# ============= function =============
forecast_ahead <- function(my_df, forecast_date, forecast_var, omitted_months) {
start_date <- min(my_df$year_month)
last_date <- max(my_df$year_month)
# conditional code - omitted months param is optional
if(missing(omitted_months)) {
model_last_date <- last_date
print("use last date")
} else {
model_last_date <- last_date %m+% days(1) %m-% months(omitted_months)
# filter out omitted months of input data
my_df <- my_df %>% filter(year_month < ymd(model_last_date))
print("cater to omitted months")
}
print("Model_last_date")
print(model_last_date)
forecast_intervals <- interval(model_last_date,forecast_date) %/% months(1)
# transform data to time series, using extracted start date, just using the column of interest
mts <- ts(my_df[forecast_var], frequency=12, start=c(year(start_date),month(start_date)))
# forecasting model using arima model
fmodel <- auto.arima(mts)
# forecast ahead
fts <- forecast(fmodel, forecast_intervals)
# make it a dataframe for easier manipulation
df_fts <- data.frame(fts)
# make the row name into a column
df_fts <- cbind(rownames(df_fts), data.frame(df_fts, row.names=NULL))
# give better names
# NOTE ASSUMPTION THAT TIME SERIES ALWAYS IN MONTHS and NAME IS year_month
df_fts <- rename(df_fts, year_month = 'rownames(df_fts)')
# https://community.rstudio.com/t/pass-a-variable-to-dplyr-rename-to-change-columnname/6907
# df %>% rename(!!variable := name_of_col_from_df)
df_fts <- rename(df_fts, !!forecast_var := 'Point.Forecast')
# Convert to a date (last day of month) to match other data
df_fts$year_month <- paste0("01 ", df_fts$year_month)
df_fts$year_month <- as.Date(df_fts$year_month, "%d %b %Y") %m+% months(1) %m-% days(1)
return(df_fts)
}
# =================== E N D F U N C T I O N ================
Get solar export forecast
# read data for SOLAR EXPORT and TOTAL CONSUMPTION forecast
rdsfilename = "EQld_consumption_Noosa_LGA.rds"
Noosa_LGA_consump <- read_rds(rdsfilename)
# ============= N O T E ========
# Energex data contains N/A values for future months
# so need to remove these from Noosa_LGA_consump
# best to do this where rds file is created
# but do it here for now
Noosa_LGA_consump <- Noosa_LGA_consump %>% drop_na()
# ========= forecast SOLAR EXPORT for all localities ==============
# create empty data frame for solar export forecast
solar_export_forecast_df <- data.frame(
year_month = as.Date(character()),
total_kWh = numeric() )
for (mylocality in locality_list) {
my_LGA <- Noosa_LGA_consump[Noosa_LGA_consump$locality==mylocality,]
my_LGA <- my_LGA %>% arrange(year_month)
df_fts <- forecast_ahead(my_LGA, ymd("2027-01-01"), 'solar_kWh')
# discard columns we don't really need
df_fts_trim <- select(df_fts, year_month, solar_kWh)
# add a locality column back in
df_fts_trim <- add_column(df_fts_trim, locality = mylocality, .before = 1)
# and bind it to other localities
solar_export_forecast_df <- rbind(solar_export_forecast_df,df_fts_trim)
}
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
# check if file exists, and if so delete
rds_filename = "solar_export_forecast.rds"
if(file.exists(rds_filename)) {
file.remove(rds_filename)
}
## [1] TRUE
saveRDS(solar_export_forecast_df,file = rds_filename)
Get total consumption forecast
# ========= forecast TOTAL CONSUMPTION for all localities ==============
# create empty data frame for solar export forecast
consumption_forecast_df <- data.frame(
year_month = as.Date(character()),
total_kWh = numeric() )
for (mylocality in locality_list) {
my_LGA <- Noosa_LGA_consump[Noosa_LGA_consump$locality==mylocality,]
my_LGA <- my_LGA %>% arrange(year_month)
df_fts <- forecast_ahead(my_LGA, ymd("2027-01-01"), 'total_kWh')
# discard columns we don't really need
df_fts_trim <- select(df_fts, year_month, total_kWh)
# add a locality column back in
df_fts_trim <- add_column(df_fts_trim, locality = mylocality, .before = 1)
# and bind it to other locailities
consumption_forecast_df <- rbind(consumption_forecast_df,df_fts_trim)
}
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
## [1] "use last date"
## [1] "Model_last_date"
## [1] "2022-09-30"
# check if file exists, and if so delete
rds_filename = "consumption_forecast.rds"
if(file.exists(rds_filename)) {
file.remove(rds_filename)
}
## [1] TRUE
saveRDS(consumption_forecast_df,file = rds_filename)
Forecast solar PV kW
# ========= forecast Total_PV_kW for all localities ==============
# Read data for SOLAR PV forecast
Noosa_postcode_data <- readRDS("Noosa_LGA_Postcode_CER_totals.rds")
# create empty data frame for solar export forecast
Total_PV_kW_forecast_df <- data.frame(
year_month = as.Date(character()),
Total_PV_kW = numeric() )
for (mylocality in locality_list) {
my_LGA <- Noosa_postcode_data[Noosa_postcode_data$Postcode==mylocality,]
my_LGA <- my_LGA %>% arrange(year_month)
# how many months to mit from forecast to allow CER data to stabilise....
omitted_months <- 6
df_fts <- forecast_ahead(my_LGA, ymd("2027-01-01"), 'Total_PV_kW', omitted_months)
# discard columns we don't really need
df_fts_trim <- select(df_fts, year_month, Total_PV_kW)
# add a locality column back in
df_fts_trim <- add_column(df_fts_trim, locality = mylocality, .before = 1)
# and bind it to other locailities
Total_PV_kW_forecast_df <- rbind(Total_PV_kW_forecast_df,df_fts_trim)
}
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
## [1] "cater to omitted months"
## [1] "Model_last_date"
## [1] "2022-01-01"
# check if file exists, and if so delete
rds_filename = "Total_PV_kW_forecast.rds"
if(file.exists(rds_filename)) {
file.remove(rds_filename)
}
## [1] TRUE
saveRDS(Total_PV_kW_forecast_df,file = rds_filename)
# ================ E N D O F F O R E C A S T I N G ============
Forecasting done
Now apply monthly generation profile to get generation kWh
# at this point we have forecasts for all localities - consumption, export + solar_kW
# ==========================================
# Now estimate solar production
# get actual + forecast solar PV
# apply PVWatts: Monthly PV Performance Data
# but de-rate to 1kW -> 1.2 MWh annual
# PV Watts gives 1.67
# ========================
# First get Total_PV_kW - actual and forecast
# actual comes from my_postcode
Noosa_postcode_data <- readRDS("Noosa_LGA_Postcode_CER_totals.rds")
# better to rename "Postcode" to "locality" to match other datasets
Noosa_postcode_data <- rename(Noosa_postcode_data, locality = Postcode)
# now get forecast
rdsfilename = "Total_PV_kW_forecast.rds"
Total_PV_kW_forecast_df <- read_rds(rdsfilename)
# so my_postcode + Total_PV_kW_forecast_df have data for all localities....
# so can iterate over all localities.....
# create empty data frame for Total_PV_kW_df
Total_PV_kW_df <- data.frame(
year_month = as.Date(character()),
Total_PV_kW = numeric() )
for (mylocality in locality_list) {
df_fts <- Total_PV_kW_forecast_df %>% filter(locality == mylocality)
my_postcode <- Noosa_postcode_data %>% filter(locality == mylocality)
# need to trim off overlap in forecast
# so get "last_date" for actuals
last_date <- max(my_postcode$year_month)
# now trim forecast based on last_date
df_fts_less <- df_fts %>%
filter(year_month > ymd(last_date))
# just get columns from my_postcode that match forecast
my_postcode <- select(my_postcode, locality, year_month, Total_PV_kW)
# now bind to get all in one dataset for this locality
my_Total_PV_kW_df <- rbind(my_postcode,df_fts_less)
# and combine with the total for all localities
Total_PV_kW_df <- rbind(Total_PV_kW_df,my_Total_PV_kW_df)
}
# =============> now apply monthly generation factor to each month.....
# this can be applied once on the dataframe containing all localities
kWh_factor <- c(
109.821,
102.139,
102.746,
88.205,
88.160,
77.755,
91.697,
102.336,
108.342,
110.870,
111.866,
106.062 )
Total_PV_kWh_df <- Total_PV_kW_df %>% mutate(Total_PV_kWh = Total_PV_kW * kWh_factor[month(year_month)])
# remove the kW column
Total_PV_kWh_df <- Total_PV_kWh_df %>% select(- Total_PV_kW)
# =========== cool to here - all localities =========
Now put all values in a single dataframe and calculate new columns
locality
year_month
A. Metered Energex
B. Exported Energex
C. Generated CER x generation curve
D. Self_Consume C - B
E. Total_Consume A + D
F. Grid A - B
G. Grid_RE some factor of F - maybe get from AEMO fuel mix (outside daytime)
H. Percent RE 100 x ( C + G ) / E
# ====== put all data in a
# single data frame, with observations & calculated values in separate columns
# then use ggplot as per following suggestion
# https://stackoverflow.com/questions/71844183/plotting-multiple-columns-in-r-ggplot-and-have-the-legend-right
# ==========================
# so we should end up with data frame with columns - that cover actual + forecast
# should all be trimmed to have the same start date (ie CER data)
# locality
# year_month
# A. Metered Energex
# B. Exported Energex
# C. Generated CER x generation curve
# D. Self_Consume C - B
# E. Total_Consume A + D
# F. Grid A - B
# G. Grid_RE some factor of F - maybe get from AEMO fuel mix (outside daytime)
# H. Percent RE 100 x ( C + G ) / E
# =========== Following section gets metered_df + exported_df =======
rdsfilename = "EQld_consumption_Noosa_LGA.rds"
Noosa_LGA_consump <- read_rds(rdsfilename) # %>% filter(locality == mylocality)
# get rid of na
Noosa_LGA_consump <- Noosa_LGA_consump %>% drop_na()
rdsfilename = "consumption_forecast.rds"
consumption_forecast_df <- read_rds(rdsfilename)
# get actual + forecast CONSUMPTION
my_LGA <- Noosa_LGA_consump %>%
select(locality, year_month, total_kWh)
df_fts <- consumption_forecast_df
# This is "A"
metered_df <- rbind(my_LGA, df_fts) %>% drop_na()
# get actual + forecast SOLAR EXPORT
rdsfilename = "solar_export_forecast.rds"
solar_export_forecast_df <- read_rds(rdsfilename)
my_LGA <- Noosa_LGA_consump %>%
select(locality, year_month, solar_kWh)
df_fts <- solar_export_forecast_df
# This is "B"
exported_df <- rbind(my_LGA, df_fts) %>% drop_na()
# ============= Combine all actuals + forecast into one dataset =========
# join metered + exported
df1 <-left_join(metered_df,exported_df, by=c('locality'='locality', 'year_month'='year_month'))
# and join generated
df2 <-left_join(df1, Total_PV_kWh_df, by=c('locality'='locality', 'year_month'='year_month'))
# Note - RE_fuel_mix_pc should have ramping
RE_fuel_mix_pc = 10
df3 <-
df2 %>%
mutate(metered = total_kWh/1000000,
exported = solar_kWh/1000000,
generated = Total_PV_kWh/1000000) %>%
mutate(self_consume = generated - exported,
grid = metered - exported) %>%
mutate(total_consume = metered + self_consume) %>%
mutate(grid_RE = RE_fuel_mix_pc * grid / 100) %>%
mutate(pc_RE = 100 * (generated + grid_RE) / total_consume)
# This looks good so save it
# check if file exists, and if so delete
rds_filename = "Noosa RE pc actual and projected.rds"
if(file.exists(rds_filename)) {
file.remove(rds_filename)
}
## [1] TRUE
saveRDS(df3,file = rds_filename)
# Now plot %RE for all localities - use facet_wrap.....
# but could plot individual localities - uncomment code below
p3 <- ggplot(df3, #%>% filter(locality == mylocality)
aes(year_month)) + # Create ggplot2 plot
geom_line(aes(y = pc_RE)) +
labs(title = "Actual/Projected % RE",
y = "% RE", x= "year") +
facet_wrap(~ locality)
# p3
ggplotly(p3, tooltip = "text")
df3 %>% # Summary by group using dplyr
group_by(locality) %>%
summarize(max_pc_RE = max(pc_RE))
## # A tibble: 9 × 2
## locality max_pc_RE
## <chr> <dbl>
## 1 4563 58.5
## 2 4565 58.4
## 3 4566 41.2
## 4 4567 36.5
## 5 4568 56.2
## 6 4569 63.1
## 7 4571 58.3
## 8 4573 NA
## 9 NOOSA SHIRE 47.1
# Now plot SOLAR EXPORT all localities - use facet_wrap.....
# but could plot individual localities - uncomment code below
mylocality = "NOOSA SHIRE"
p3 <- ggplot(df3 %>% filter(locality == mylocality),
aes(year_month)) + # Create ggplot2 plot
geom_line(aes(y = exported)) +
labs(title = paste("Actual/Projected Solar Export",mylocality),
y = "solar export (GWh/month)", x= "year") # +
# facet_wrap(~ locality)
# p3
ggplotly(p3, tooltip = "text")
# Now plot GENERATED all localities - use facet_wrap.....
# but could plot individual localities - uncomment code below
mylocality = "NOOSA SHIRE"
p3 <- ggplot(df3 %>% filter(locality == mylocality),
aes(year_month)) + # Create ggplot2 plot
geom_line(aes(y = generated)) +
labs(title = paste("Actual/Projected Solar Generated",mylocality),
y = "solar generated (GWh/month)", x= "year") # +
# facet_wrap(~ locality)
# p3
ggplotly(p3, tooltip = "text")
# Now plot METERED all localities - use facet_wrap.....
# but could plot individual localities - uncomment code below
mylocality = "NOOSA SHIRE"
p3 <- ggplot(df3 %>% filter(locality == mylocality),
aes(year_month)) + # Create ggplot2 plot
geom_line(aes(y = metered)) +
labs(title = paste("Actual/Projected - Metered Consumption",mylocality),
y = "metered consumption (GWh/month)", x= "year") # +
# facet_wrap(~ locality)
# p3
ggplotly(p3, tooltip = "text")