###GENARATION AND VISUALIZATION OF A TIME SERIES DATA

library(ggplot2)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(tseries)
library(urca)
library(TTR)
set.seed(123)
n<-300     #Number of months in 25 years
dates<- seq(as.Date("2000-01-01"), by= "months",length.out = n)
trend<- seq(100,400 ,length.out = n)
seasonality <- 50* sin(2* pi*(1:n)/12)
noise <- rnorm(n, mean = 34,sd= 60)                      

#Final time series data

data<- data.frame(date = dates,
                  value = trend + seasonality+ noise)

head(dates)
## [1] "2000-01-01" "2000-02-01" "2000-03-01" "2000-04-01" "2000-05-01"
## [6] "2000-06-01"
View(dates)

#Converting into time series format “ts_data” ##From the data set “data” obtain “value” which starts from 2000 data 1,,with a #frequency of 12 months

ts_data<-ts(data$value,start= c(2000,1),frequency=12)

#Plotting time series

autoplot(ts_data) +
  ggtitle("Synthetic Time Series Plot of the Generated Data") +
  ylab("Value") +xlab("Time")

#Checking fo Stationarity by Augmented Dickey-Fuller(ADF)Test and ##Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test

#Augmented Dickey Fuller (ADF)test: (Null hypothesis: Non-stationary)

adf_test <- adf.test(ts_data)
## Warning in adf.test(ts_data): p-value smaller than printed p-value
print(adf_test)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -10.107, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary

#KPSS Test:Null hypothesis: stationary

kpss_test<- kpss.test(ts_data)
## Warning in kpss.test(ts_data): p-value smaller than printed p-value
print(kpss_test)
## 
##  KPSS Test for Level Stationarity
## 
## data:  ts_data
## KPSS Level = 4.5369, Truncation lag parameter = 5, p-value = 0.01

#Step 4: Differencing to Achieve Stationarity (if needed) #KPSS above shows there exist non-stationality which needs to be removed

First differencing if needed

diff_ts_data <- diff(ts_data)

Plot differenced data

autoplot(diff_ts_data) +
  ggtitle("Differenced Time Series") +
  ylab("Differenced Value") + xlab("Time")

###Re-run stationarity tests ###Augmented Dickey-Fuller(ADF)Test

adf.test(diff_ts_data)
## Warning in adf.test(diff_ts_data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_ts_data
## Dickey-Fuller = -11.866, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
###KPSS Test
kpss.test(diff_ts_data)
## Warning in kpss.test(diff_ts_data): p-value greater than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  diff_ts_data
## KPSS Level = 0.010288, Truncation lag parameter = 5, p-value = 0.1

#Decompose Time Series: Use decompose() to separate the trend, seasonality, and #residuals.

library(stats) 
decomposed_data<- decompose(ts_data)
plot(decomposed_data)

#Alternatively

library(ggplot2)
library(forecast)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#STL decomposition(Seasonal and Trend decomposition using LOESS)

decomposed_data<-stl(ts_data,s.window="periodic")
plot(decomposed_data)

##Autocorrelation Function(ACF):Use acf()to analyze autocorrelation.

par(mfrow = c(1,2))#### separates (ACF) and (PACF)

Example: Autocorrelation function

acf(ts_data, lag.max = 20)

Step 7. Partial Autocorrelation Function (PACF)

pacf(ts_data, lag.max = 20)

#Build an ARIMA Model: Fit an ARIMA model using auto.arima() from the forecast package.

Example: Fit an ARIMA model

library(forecast)
arima_model <- auto.arima(ts_data)
arima_model
## Series: ts_data 
## ARIMA(1,1,2)(2,0,0)[12] 
## 
## Coefficients:
##           ar1      ma1      ma2    sar1    sar2
##       -0.8365  -0.0052  -0.8734  0.1960  0.2226
## s.e.   0.0613   0.0415   0.0409  0.0611  0.0612
## 
## sigma^2 = 4255:  log likelihood = -1672.8
## AIC=3357.61   AICc=3357.89   BIC=3379.81
summary(arima_model)
## Series: ts_data 
## ARIMA(1,1,2)(2,0,0)[12] 
## 
## Coefficients:
##           ar1      ma1      ma2    sar1    sar2
##       -0.8365  -0.0052  -0.8734  0.1960  0.2226
## s.e.   0.0613   0.0415   0.0409  0.0611  0.0612
## 
## sigma^2 = 4255:  log likelihood = -1672.8
## AIC=3357.61   AICc=3357.89   BIC=3379.81
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE     MASE
## Training set 8.653559 64.57576 50.10357 -7.554214 25.00679 0.744385
##                      ACF1
## Training set -0.007543781

#Forecast future values

forecast_values <- forecast(arima_model, h = 15) # Forecast next 15 periods
forecast_values
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 2025       435.9422 352.3449 519.5396 308.0911 563.7933
## Feb 2025       475.1268 390.4875 559.7660 345.6822 604.5713
## Mar 2025       471.5118 386.8676 556.1560 342.0597 600.9640
## Apr 2025       481.9080 396.5618 567.2542 351.3822 612.4338
## May 2025       436.8145 351.4622 522.1668 306.2794 567.3495
## Jun 2025       452.2677 366.4098 538.1255 320.9594 583.5760
## Jul 2025       456.4655 370.5748 542.3561 325.1070 587.8239
## Aug 2025       427.3039 341.0255 513.5823 295.3525 559.2553
## Sep 2025       443.2285 356.8862 529.5708 311.1794 575.2776
## Oct 2025       415.5022 328.8454 502.1591 282.9720 548.0325
## Nov 2025       435.2909 348.5425 522.0393 302.6206 567.9612
## Dec 2025       484.2717 397.2554 571.2879 351.1917 617.3516
## Jan 2026       446.9864 357.5103 536.4625 310.1445 583.8283
## Feb 2026       467.4075 377.4769 557.3381 329.8706 604.9444
## Mar 2026       479.0374 388.9896 569.0852 341.3212 616.7536

Plot the forecast

plot(forecast_values)

#Plot only the forecast with 95% confidence intervals without the Data

autoplot(forecast_values,include=0)+
  ggtitle("ARIMAForecast(Next15Months)") +
  ylab("PredictedValue") +xlab("Time")