R Markdown
# STEP 0: Load Required Packages
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
library(ggplot2)
library(scales) # for formatting axis labels
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
# STEP 1: Load and Prepare CSV Data
data <- read_csv("C:/Users/accou/Downloads/WWF Analysis/WWFCollectionsAD.csv")
## Rows: 66 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1): AmountCollected
## date (1): DatePaid
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Ensure date column is in Date format and sorted
data <- data %>%
mutate(DatePaid = as.Date(DatePaid)) %>%
arrange(DatePaid)
# STEP 2: Create Time Series Object
start_year <- year(min(data$DatePaid))
start_month <- month(min(data$DatePaid))
ts_data <- ts(data$AmountCollected, start = c(start_year, start_month), frequency = 12)
# STEP 3: Check Stationarity
adf.test(ts_data)
## Warning in adf.test(ts_data): p-value greater than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: ts_data
## Dickey-Fuller = 1.269, Lag order = 4, p-value = 0.99
## alternative hypothesis: stationary
# If p-value > 0.05, differencing is needed
# STEP 4: Fit ARIMA Model Automatically
model <- auto.arima(ts_data)
summary(model)
## Series: ts_data
## ARIMA(1,1,0)(1,0,0)[12] with drift
##
## Coefficients:
## ar1 sar1 drift
## -0.6027 0.5264 5494.490
## s.e. 0.0976 0.1349 2161.523
##
## sigma^2 = 244747730: log likelihood = -720.63
## AIC=1449.26 AICc=1449.93 BIC=1457.96
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -149.643 15162.93 10282.28 -0.7234085 4.202458 0.2511172
## ACF1
## Training set 0.05659578
# STEP 5: Forecast Future Values
forecasted <- forecast(model, h = 60)
plot(forecasted)

# STEP 6: Combine Forecast with Dates and Confidence Intervals
last_date <- max(data$DatePaid)
future_dates <- seq(last_date %m+% months(1), by = "month", length.out = 60)
forecast_df <- data.frame(
date = future_dates,
forecast = as.numeric(forecasted$mean),
lower80 = as.numeric(forecasted$lower[,1]),
upper80 = as.numeric(forecasted$upper[,1]),
lower95 = as.numeric(forecasted$lower[,2]),
upper95 = as.numeric(forecasted$upper[,2])
)
# STEP 7: Visualize Actual vs Forecast with Confidence Ribbon
actual_df <- data %>%
select(DatePaid, AmountCollected) %>%
rename(date = DatePaid, value = AmountCollected) %>%
mutate(type = "Actual")
forecast_plot_df <- forecast_df %>%
rename(value = forecast) %>%
mutate(type = "Forecast")
combined_df <- bind_rows(actual_df, forecast_plot_df)
ggplot() +
geom_line(data = actual_df, aes(x = date, y = value), color = "#1f77b4", size = 1.2) +
geom_line(data = forecast_plot_df, aes(x = date, y = value), color = "#ff7f0e", size = 1.2) +
geom_ribbon(data = forecast_df, aes(x = date, ymin = lower95, ymax = upper95), fill = "#ff7f0e", alpha = 0.2) +
scale_y_continuous(labels = comma_format(accuracy = 1)) +
labs(title = "ARIMA Forecast of WWF Collections with 95% Confidence Interval",
x = "Date", y = "Amount Collected (ZAR)") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# STEP 8: Export Forecast to CSV
write_csv(forecast_df, "arima_forecast_output.csv")
#STEP 9: last 10 Data sample
tail(forecast_df)
## date forecast lower80 upper80 lower95 upper95
## 55 2030-03-30 791437.7 635091.8 947783.5 552327.4 1030548
## 56 2030-04-30 794516.3 636131.6 952900.9 552287.8 1036745
## 57 2030-05-30 801696.3 641294.5 962098.1 556382.9 1047010
## 58 2030-06-30 805679.9 643288.5 968071.3 557323.7 1054036
## 59 2030-07-30 814452.1 650093.7 978810.5 563087.6 1065817
## 60 2030-08-30 819950.5 653649.3 986251.8 565614.7 1074286