###GENARATION AND VISUALIZATION OF A TIME SERIES DATA
library(ggplot2)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
library(urca)
library(TTR)
set.seed(123)
n<-300 #Number of months in 25 years
dates<- seq(as.Date("2000-01-01"), by= "months",length.out = n)
trend<- seq(100,400 ,length.out = n)
seasonality <- 50* sin(2* pi*(1:n)/12)
noise <- rnorm(n, mean = 34,sd= 60)
#Final time series data
data<- data.frame(date = dates,
value = trend + seasonality+ noise)
head(dates)
## [1] "2000-01-01" "2000-02-01" "2000-03-01" "2000-04-01" "2000-05-01"
## [6] "2000-06-01"
View(dates)
#Converting into time series format “ts_data” ##From the data set “data” obtain “value” which starts from 2000 data 1,,with a #frequency of 12 months
ts_data<-ts(data$value,start= c(2000,1),frequency=12)
#Plotting time series
autoplot(ts_data) +
ggtitle("Synthetic Time Series Plot of the Generated Data") +
ylab("Value") +xlab("Time")
#Checking fo Stationarity by Augmented Dickey-Fuller(ADF)Test and ##Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test
#Augmented Dickey Fuller (ADF)test: (Null hypothesis: Non-stationary)
adf_test <- adf.test(ts_data)
## Warning in adf.test(ts_data): p-value smaller than printed p-value
print(adf_test)
##
## Augmented Dickey-Fuller Test
##
## data: ts_data
## Dickey-Fuller = -10.107, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
#KPSS Test:Null hypothesis: stationary
kpss_test<- kpss.test(ts_data)
## Warning in kpss.test(ts_data): p-value smaller than printed p-value
print(kpss_test)
##
## KPSS Test for Level Stationarity
##
## data: ts_data
## KPSS Level = 4.5369, Truncation lag parameter = 5, p-value = 0.01
#Step 4: Differencing to Achieve Stationarity (if needed) #KPSS above shows there exist non-stationality which needs to be removed
diff_ts_data <- diff(ts_data)
autoplot(diff_ts_data) +
ggtitle("Differenced Time Series") +
ylab("Differenced Value") + xlab("Time")
###Re-run stationarity tests ###Augmented Dickey-Fuller(ADF)Test
adf.test(diff_ts_data)
## Warning in adf.test(diff_ts_data): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff_ts_data
## Dickey-Fuller = -11.866, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
###KPSS Test
kpss.test(diff_ts_data)
## Warning in kpss.test(diff_ts_data): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff_ts_data
## KPSS Level = 0.010288, Truncation lag parameter = 5, p-value = 0.1
#Decompose Time Series: Use decompose() to separate the trend, seasonality, and #residuals.
library(stats)
decomposed_data<- decompose(ts_data)
plot(decomposed_data)
#Alternatively
library(ggplot2)
library(forecast)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#STL decomposition(Seasonal and Trend decomposition using LOESS)
decomposed_data<-stl(ts_data,s.window="periodic")
plot(decomposed_data)
##Autocorrelation Function(ACF):Use acf()to analyze autocorrelation.
par(mfrow = c(1,2))#### separates (ACF) and (PACF)
acf(ts_data, lag.max = 20)
pacf(ts_data, lag.max = 20)
#Build an ARIMA Model: Fit an ARIMA model using auto.arima() from the forecast package.
library(forecast)
arima_model <- auto.arima(ts_data)
arima_model
## Series: ts_data
## ARIMA(1,1,2)(2,0,0)[12]
##
## Coefficients:
## ar1 ma1 ma2 sar1 sar2
## -0.8365 -0.0052 -0.8734 0.1960 0.2226
## s.e. 0.0613 0.0415 0.0409 0.0611 0.0612
##
## sigma^2 = 4255: log likelihood = -1672.8
## AIC=3357.61 AICc=3357.89 BIC=3379.81
summary(arima_model)
## Series: ts_data
## ARIMA(1,1,2)(2,0,0)[12]
##
## Coefficients:
## ar1 ma1 ma2 sar1 sar2
## -0.8365 -0.0052 -0.8734 0.1960 0.2226
## s.e. 0.0613 0.0415 0.0409 0.0611 0.0612
##
## sigma^2 = 4255: log likelihood = -1672.8
## AIC=3357.61 AICc=3357.89 BIC=3379.81
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 8.653559 64.57576 50.10357 -7.554214 25.00679 0.744385
## ACF1
## Training set -0.007543781
#Forecast future values
forecast_values <- forecast(arima_model, h = 15) # Forecast next 15 periods
forecast_values
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2025 435.9422 352.3449 519.5396 308.0911 563.7933
## Feb 2025 475.1268 390.4875 559.7660 345.6822 604.5713
## Mar 2025 471.5118 386.8676 556.1560 342.0597 600.9640
## Apr 2025 481.9080 396.5618 567.2542 351.3822 612.4338
## May 2025 436.8145 351.4622 522.1668 306.2794 567.3495
## Jun 2025 452.2677 366.4098 538.1255 320.9594 583.5760
## Jul 2025 456.4655 370.5748 542.3561 325.1070 587.8239
## Aug 2025 427.3039 341.0255 513.5823 295.3525 559.2553
## Sep 2025 443.2285 356.8862 529.5708 311.1794 575.2776
## Oct 2025 415.5022 328.8454 502.1591 282.9720 548.0325
## Nov 2025 435.2909 348.5425 522.0393 302.6206 567.9612
## Dec 2025 484.2717 397.2554 571.2879 351.1917 617.3516
## Jan 2026 446.9864 357.5103 536.4625 310.1445 583.8283
## Feb 2026 467.4075 377.4769 557.3381 329.8706 604.9444
## Mar 2026 479.0374 388.9896 569.0852 341.3212 616.7536
plot(forecast_values)
#Plot only the forecast with 95% confidence intervals without the Data
autoplot(forecast_values,include=0)+
ggtitle("ARIMAForecast(Next15Months)") +
ylab("PredictedValue") +xlab("Time")