Problem Definition
In the xtsdata (csv file) there are 5 columns.
The aim is to use the columns (RUB_sol and MFA_sol) and predict 30 data points each for the two columns. Auto Regressive Integrated Moving Average
- Visualize Time Series
- Check Stationarity
- Plot ACF / PACF Charts
- Build ARIMA Model
- Make Forecast

Data Location
The data is available as a csv file named xtsdata.csv

Data Description
The data consists of 5 columns.
time15
RUB_sol
MFA_sol
NFA_sol
NFY_sol
SFY_baro_air
NFA_baro_air

Setup

Load Libs

library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(tseries) 
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
library(forecast)
library(quantmod)
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(devtools)
library(ggfortify)
## Loading required package: ggplot2
## 
## Attaching package: 'ggfortify'
## The following object is masked from 'package:forecast':
## 
##     gglagplot

Read Data

setwd("D:/Welingkar/Trim 4/Machine Learning/Datasets/Lecture 6-7")
dfrModel <- read.csv("./xtsdata.csv", header=T, stringsAsFactors=F)
head(dfrModel)
##           time15 RUB_sol MFA_sol NFA_sol NFY_sol SFY_baro_air NFA_baro_air
## 1 4/30/2012 0:15  11.929  12.689   11.26    8.19         9.43       13.134
## 2 4/30/2012 0:30  11.879  12.627   11.28    8.18         9.25       12.925
## 3 4/30/2012 0:45  11.828  12.570   11.26    8.21         9.03       12.736
## 4 4/30/2012 1:00  11.779  12.511   11.23    8.22         8.82       12.605
## 5 4/30/2012 1:15  11.730  12.459   11.17    8.23         8.60       12.455
## 6 4/30/2012 1:30  11.682  12.397   11.15    8.24         8.42       12.335

Create Extensible Time Series

names(dfrModel)
## [1] "time15"       "RUB_sol"      "MFA_sol"      "NFA_sol"     
## [5] "NFY_sol"      "SFY_baro_air" "NFA_baro_air"
## This is done to prevent date being read as a string. xts needs it to be a date object.
dfrModel$time15 <- as.POSIXlt(dfrModel$time15,format="%m/%d/%Y %H:%M") 
xtsData <- xts(dfrModel, order.by=dfrModel$time15)
head(xtsData)
##                     time15                RUB_sol  MFA_sol  NFA_sol
## 2012-04-30 00:15:00 "2012-04-30 00:15:00" "11.929" "12.689" "11.26"
## 2012-04-30 00:30:00 "2012-04-30 00:30:00" "11.879" "12.627" "11.28"
## 2012-04-30 00:45:00 "2012-04-30 00:45:00" "11.828" "12.570" "11.26"
## 2012-04-30 01:00:00 "2012-04-30 01:00:00" "11.779" "12.511" "11.23"
## 2012-04-30 01:15:00 "2012-04-30 01:15:00" "11.730" "12.459" "11.17"
## 2012-04-30 01:30:00 "2012-04-30 01:30:00" "11.682" "12.397" "11.15"
##                     NFY_sol SFY_baro_air NFA_baro_air
## 2012-04-30 00:15:00 " 8.19" " 9.43"      "13.134"    
## 2012-04-30 00:30:00 " 8.18" " 9.25"      "12.925"    
## 2012-04-30 00:45:00 " 8.21" " 9.03"      "12.736"    
## 2012-04-30 01:00:00 " 8.22" " 8.82"      "12.605"    
## 2012-04-30 01:15:00 " 8.23" " 8.60"      "12.455"    
## 2012-04-30 01:30:00 " 8.24" " 8.42"      "12.335"

Creating two separate Extensible Time Series for the two columns “RUB_sol”, “MFA_sol”
Creating Extensible Time Series for RUB_sol

xtsRUB_sol <-xts(dfrModel$RUB_sol,order.by =dfrModel$time15)
names(xtsRUB_sol)[1] <-paste("RUB_sol")
class(xtsRUB_sol)
## [1] "xts" "zoo"
head(xtsRUB_sol)
##                     RUB_sol
## 2012-04-30 00:15:00  11.929
## 2012-04-30 00:30:00  11.879
## 2012-04-30 00:45:00  11.828
## 2012-04-30 01:00:00  11.779
## 2012-04-30 01:15:00  11.730
## 2012-04-30 01:30:00  11.682

Creating Extensible Time Series for MFA_sol

xtsMFA_sol <-xts(dfrModel$MFA_sol,order.by =dfrModel$time15)
names(xtsMFA_sol)[1] <-paste("MFA_sol")
class(xtsMFA_sol)
## [1] "xts" "zoo"
head(xtsMFA_sol)
##                     MFA_sol
## 2012-04-30 00:15:00  12.689
## 2012-04-30 00:30:00  12.627
## 2012-04-30 00:45:00  12.570
## 2012-04-30 01:00:00  12.511
## 2012-04-30 01:15:00  12.459
## 2012-04-30 01:30:00  12.397

PART-1-(xtsRUB_SOL)

Dealing first with xtsRUB_sol
Xts Info

cat("\nSummary:\n")
## 
## Summary:
summary(xtsRUB_sol)
##      Index                        RUB_sol      
##  Min.   :2012-04-30 00:15:00   Min.   : 9.489  
##  1st Qu.:2012-05-15 21:11:15   1st Qu.:13.906  
##  Median :2012-05-31 18:07:30   Median :16.043  
##  Mean   :2012-05-31 18:07:30   Mean   :15.956  
##  3rd Qu.:2012-06-16 15:03:45   3rd Qu.:17.971  
##  Max.   :2012-07-02 12:00:00   Max.   :22.439
cat("\nStart:\n")
## 
## Start:
start(xtsRUB_sol)
## [1] "2012-04-30 00:15:00 IST"
cat("\nEnds:\n")
## 
## Ends:
end(xtsRUB_sol)
## [1] "2012-07-02 12:00:00 IST"
cat("\nFreq:\n")
## 
## Freq:
frequency(xtsRUB_sol)
## [1] 0.001111111
cat("\nIndex:\n")
## 
## Index:
head(index(xtsRUB_sol))
## [1] "2012-04-30 00:15:00 IST" "2012-04-30 00:30:00 IST"
## [3] "2012-04-30 00:45:00 IST" "2012-04-30 01:00:00 IST"
## [5] "2012-04-30 01:15:00 IST" "2012-04-30 01:30:00 IST"
cat("\nPeriodicity:\n")
## 
## Periodicity:
periodicity(xtsRUB_sol)
## 15 minute periodicity from 2012-04-30 00:15:00 to 2012-07-02 12:00:00
cat("\nYearly OHLC:\n")
## 
## Yearly OHLC:
to.yearly(xtsRUB_sol)
##            xtsRUB_sol.Open xtsRUB_sol.High xtsRUB_sol.Low xtsRUB_sol.Close
## 2012-07-02          11.929          22.439          9.489            20.55
cat("\nYearly Mean:\n")
## 
## Yearly Mean:
lapply(split(xtsRUB_sol,f="years"),FUN=mean)
## [[1]]
## [1] 15.95573
cat("\nQuarterly OHLC:\n")
## 
## Quarterly OHLC:
head(to.quarterly(xtsRUB_sol))
##         xtsRUB_sol.Open xtsRUB_sol.High xtsRUB_sol.Low xtsRUB_sol.Close
## 2012 Q2          11.929          22.439          9.489           18.997
## 2012 Q3          18.960          21.896         17.883           20.550
cat("\nQuarterly Mean:\n")
## 
## Quarterly Mean:
head(lapply(split(xtsRUB_sol,f="quarters"),FUN=mean))
## [[1]]
## [1] 15.87123
## 
## [[2]]
## [1] 19.42347
cat("\nMonthly OHLC:\n")
## 
## Monthly OHLC:
head(to.monthly(xtsRUB_sol))
##          xtsRUB_sol.Open xtsRUB_sol.High xtsRUB_sol.Low xtsRUB_sol.Close
## Apr 2012          11.929          13.665         10.914           12.589
## May 2012          12.546          18.891          9.489           17.254
## Jun 2012          17.207          22.439         12.995           18.997
## Jul 2012          18.960          21.896         17.883           20.550
cat("\nMonthly Mean:\n")
## 
## Monthly Mean:
head(lapply(split(xtsRUB_sol,f="months"),FUN=mean))
## [[1]]
## [1] 12.27673
## 
## [[2]]
## [1] 14.09251
## 
## [[3]]
## [1] 17.82781
## 
## [[4]]
## [1] 19.42347

Plot xts

autoplot(xtsRUB_sol, ts.colour='blue') +
    labs(title="Times Series") +
    labs(x="Date and Month") +
    labs(y="RUB_sol")

Observation
1. Graph is shown as having frequent fluctuations in data.
2. This is not like a stationary data.
3. As there are no seasonality so we will not decompose the data.

ADF Test

# Augmented Dickey-Fuller Test
adf.test(xtsRUB_sol, alternative="stationary", k=0)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  xtsRUB_sol
## Dickey-Fuller = -1.9731, Lag order = 0, p-value = 0.5898
## alternative hypothesis: stationary

Observation
Null Hypothesis: Data is not stationary Alternative Hypothesis: Data is stationary

As we can see that P value is more than 0.05 so at 5% Level of Significance it is not able to reject Null Hypothesis.

This implies the data is not stationary.

Plot ACF- Auto Correlation Function

# Auto Correlation Function
autoplot(acf(xtsRUB_sol, plot = FALSE))

Observation
This plot should be greater than zero.
This displays the ACF graph as mentioned by ARIMA with values above the zero line.

Plot PACF- Partial Auto Correlation Function

#acf(diff(log(xtsD1)))
autoplot(pacf(xtsRUB_sol, plot = FALSE))

Observation
This plot should be below zero as well as above zero which is fulfilling ARIMA model properties.
This displays the ACF graph as mentioned by ARIMA with values below the zero line.

ARIMA
ARIMA (Auto Regressive Integrated Moving Averages)
- ARIMA Models Provide Another Approach To Time Series Forecasting
- ARIMA Models Are, In Theory, The Most General Class Of Models For Forecasting A Time Series.
- Arima Models Aim To Describe The Autocorrelations In The Data
- Pre Requisite Of ARIMA Is That The Time Series Needs To Be Stationary.
- A Stationary Series Has No Trend, Its Variations Around Its Mean Have A Constant Amplitude, And It Wiggles In A Consistent Fashion,
I.E., Its Short-term Random Time Patterns Always Look The Same In A Statistical Sense
- This is a modeling approach that can be used to calculate the probability of future value lying between two specified limits

THE MAIN ASSUMPTION IN ARIMA TECHNIQUES IS THAT THE DATA MUST BE STATIONARY

Unless Time Series Is Stationary, An ARIMA Model Can Not Be Built

Conclusion from the tests:
ADF, ACF and PACF were used to check if the data was stationery.

Observations were:
- ADF indicated a p value greater than 0.05 (ideally should be less than 0.05)
- ACF and PACF behaved like the expected graphs for ARIMA.

Ideally, ADF is the main criteria to decide whether the data is stationary or not. ACF and PACF act as a support.
In reality if ADF is greater than 0.05 then ARIMA should be avoided.

For Academic purpose we are going with ARIMA model for practive even data is not stationary.

Make ARIMA Model

# get arima model (find best model)
armModel <- auto.arima(xtsRUB_sol)
armModel
## Series: xtsRUB_sol 
## ARIMA(5,1,1)                    
## 
## Coefficients:
##          ar1      ar2     ar3      ar4     ar5      ma1
##       1.9571  -1.0185  0.1645  -0.1499  0.0409  -0.9766
## s.e.  0.0131   0.0282  0.0309   0.0282  0.0130   0.0029
## 
## sigma^2 estimated as 0.0003703:  log likelihood=15431.69
## AIC=-30849.37   AICc=-30849.36   BIC=-30802.37

Forecast Using ARIMA Model

# forecast using
fcData <- forecast(armModel,h=30)
fcData
##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 5486401       20.75225 20.72759 20.77691 20.71453 20.78996
## 5487301       20.94708 20.89236 21.00179 20.86340 21.03075
## 5488201       21.13238 21.04270 21.22205 20.99523 21.26953
## 5489101       21.30777 21.17789 21.43764 21.10914 21.50640
## 5490001       21.47257 21.29815 21.64698 21.20582 21.73931
## 5490901       21.62601 21.40354 21.84849 21.28577 21.96626
## 5491801       21.76750 21.49399 22.04101 21.34920 22.18580
## 5492701       21.89651 21.56949 22.22354 21.39637 22.39666
## 5493601       22.01261 21.63006 22.39516 21.42755 22.59767
## 5494501       22.11543 21.67577 22.55508 21.44303 22.78782
## 5495401       22.20469 21.70674 22.70263 21.44315 22.96623
## 5496301       22.28020 21.72316 22.83724 21.42828 23.13212
## 5497201       22.34186 21.72526 22.95846 21.39886 23.28486
## 5498101       22.38964 21.71337 23.06591 21.35537 23.42390
## 5499001       22.42358 21.68783 23.15933 21.29835 23.54882
## 5499901       22.44382 21.64908 23.23857 21.22837 23.65928
## 5500801       22.45057 21.59760 23.30354 21.14607 23.75508
## 5501701       22.44410 21.53393 23.35427 21.05211 23.83609
## 5502601       22.42475 21.45864 23.39087 20.94721 23.90230
## 5503501       22.39294 21.37236 23.41352 20.83210 23.95378
## 5504401       22.34914 21.27577 23.42251 20.70756 23.99072
## 5505301       22.29388 21.16957 23.41818 20.57440 24.01336
## 5506201       22.22773 21.05449 23.40097 20.43342 24.02205
## 5507101       22.15134 20.93131 23.37137 20.28547 24.01721
## 5508001       22.06537 20.80082 23.32993 20.13140 23.99935
## 5508901       21.97055 20.66381 23.27729 19.97206 23.96903
## 5509801       21.86761 20.52110 23.21411 19.80831 23.92690
## 5510701       21.75732 20.37353 23.14112 19.64100 23.87365
## 5511601       21.64051 20.22193 23.05908 19.47098 23.81003
## 5512501       21.51796 20.06711 22.96882 19.29908 23.73685

Plot Forecast Using ARIMA Model

autoplot(fcData)

Observation
Blue colour data lines are showing the forecasted values.

PART-2-(MFA_SOL)

Dealing with xtsMFA_sol
Xts Info

cat("\nSummary:\n")
## 
## Summary:
summary(xtsMFA_sol)
##      Index                        MFA_sol     
##  Min.   :2012-04-30 00:15:00   Min.   :10.54  
##  1st Qu.:2012-05-15 21:11:15   1st Qu.:13.30  
##  Median :2012-05-31 18:07:30   Median :15.09  
##  Mean   :2012-05-31 18:07:30   Mean   :14.92  
##  3rd Qu.:2012-06-16 15:03:45   3rd Qu.:16.45  
##  Max.   :2012-07-02 12:00:00   Max.   :19.71
cat("\nStart:\n")
## 
## Start:
start(xtsMFA_sol)
## [1] "2012-04-30 00:15:00 IST"
cat("\nEnds:\n")
## 
## Ends:
end(xtsMFA_sol)
## [1] "2012-07-02 12:00:00 IST"
cat("\nFreq:\n")
## 
## Freq:
frequency(xtsMFA_sol)
## [1] 0.001111111
cat("\nIndex:\n")
## 
## Index:
head(index(xtsMFA_sol))
## [1] "2012-04-30 00:15:00 IST" "2012-04-30 00:30:00 IST"
## [3] "2012-04-30 00:45:00 IST" "2012-04-30 01:00:00 IST"
## [5] "2012-04-30 01:15:00 IST" "2012-04-30 01:30:00 IST"
cat("\nPeriodicity:\n")
## 
## Periodicity:
periodicity(xtsMFA_sol)
## 15 minute periodicity from 2012-04-30 00:15:00 to 2012-07-02 12:00:00
cat("\nYearly OHLC:\n")
## 
## Yearly OHLC:
to.yearly(xtsMFA_sol)
##            xtsMFA_sol.Open xtsMFA_sol.High xtsMFA_sol.Low xtsMFA_sol.Close
## 2012-07-02          12.689          19.709         10.541           17.819
cat("\nYearly Mean:\n")
## 
## Yearly Mean:
lapply(split(xtsMFA_sol,f="years"),FUN=mean)
## [[1]]
## [1] 14.92437
cat("\nQuarterly OHLC:\n")
## 
## Quarterly OHLC:
head(to.quarterly(xtsMFA_sol))
##         xtsMFA_sol.Open xtsMFA_sol.High xtsMFA_sol.Low xtsMFA_sol.Close
## 2012 Q2          12.689          19.709         10.541           16.295
## 2012 Q3          16.252          18.691         15.603           17.819
cat("\nQuarterly Mean:\n")
## 
## Quarterly Mean:
head(lapply(split(xtsMFA_sol,f="quarters"),FUN=mean))
## [[1]]
## [1] 14.87504
## 
## [[2]]
## [1] 16.94893
cat("\nMonthly OHLC:\n")
## 
## Monthly OHLC:
head(to.monthly(xtsMFA_sol))
##          xtsMFA_sol.Open xtsMFA_sol.High xtsMFA_sol.Low xtsMFA_sol.Close
## Apr 2012          12.689          13.855         11.579           12.769
## May 2012          12.701          17.203         10.541           15.834
## Jun 2012          15.796          19.709         13.883           16.295
## Jul 2012          16.252          18.691         15.603           17.819
cat("\nMonthly Mean:\n")
## 
## Monthly Mean:
head(lapply(split(xtsMFA_sol,f="months"),FUN=mean))
## [[1]]
## [1] 12.69096
## 
## [[2]]
## [1] 13.41241
## 
## [[3]]
## [1] 16.45846
## 
## [[4]]
## [1] 16.94893

Plot xts

autoplot(xtsMFA_sol, ts.colour='blue') +
    labs(title="Times Series Plot") +
    labs(x="Date and Month") +
    labs(y="MFA_sol")

Observation
1. Graph is shown as having frequent fluctuations in data.
2. This is not like a stationary data.
3. As there are no seasonality so we will not decompose the data.

ADF Test

# Augmented Dickey-Fuller Test
adf.test(xtsMFA_sol, alternative="stationary", k=0)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  xtsMFA_sol
## Dickey-Fuller = -2.0628, Lag order = 0, p-value = 0.5517
## alternative hypothesis: stationary

Observation
Null Hypothesis: Data is not stationary Alternative Hypothesis: Data is stationary

As we can see that P value is more than 0.05 so at 5% Level of Significance it is not able to reject Null Hypothesis.

This implies the data is not stationary.

Plot ACF

# Auto Correlation Function
autoplot(acf(xtsMFA_sol, plot = FALSE))

Observation
This plot should be greater than zero.
This displays the ACF graph as mentioned by ARIMA with values above the zero line.

Plot PACF

#acf(diff(log(xtsD1)))
autoplot(pacf(xtsMFA_sol, plot = FALSE))

Observation
This plot should be below zero as well as above zero which is fulfilling ARIMA model properties.
This displays the ACF graph as mentioned by ARIMA with values below the zero line.

ARIMA
ARIMA (Auto Regressive Integrated Moving Averages)
- ARIMA Models Provide Another Approach To Time Series Forecasting
- ARIMA Models Are, In Theory, The Most General Class Of Models For Forecasting A Time Series.
- Arima Models Aim To Describe The Autocorrelations In The Data
- Pre Requisite Of ARIMA Is That The Time Series Needs To Be Stationary.
- A Stationary Series Has No Trend, Its Variations Around Its Mean Have A Constant Amplitude, And It Wiggles In A Consistent Fashion,
I.E., Its Short-term Random Time Patterns Always Look The Same In A Statistical Sense
- This is a modeling approach that can be used to calculate the probability of future value lying between two specified limits

THE MAIN ASSUMPTION IN ARIMA TECHNIQUES IS THAT THE DATA MUST BE STATIONARY

Unless Time Series Is Stationary, An ARIMA Model Can Not Be Built

Conclusion from the tests:
ADF, ACF and PACF were used to check if the data was stationery.

Observations were:
- ADF indicated a p value greater than 0.05 (ideally should be less than 0.05)
- ACF and PACF behaved like the expected graphs for ARIMA.

Ideally, ADF is the main criteria to decide whether the data is stationary or not. ACF and PACF act as a support. In reality if ADF is greater than 0.05 then ARIMA should be avoided.

For Academic purpose we are going with ARIMA model for practive even data is not stationary.

Make ARIMA Model

# get arima model (find best model)
armModel1 <- auto.arima(xtsMFA_sol)
armModel1
## Series: xtsMFA_sol 
## ARIMA(4,1,4)                    
## 
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
##          ar1     ar2      ar3      ar4     ma1      ma2     ma3     ma4
##       0.4867  0.7978  -0.3897  -0.0122  0.1243  -0.4434  0.2734  0.1057
## s.e.     NaN     NaN      NaN      NaN     NaN   0.0442     NaN     NaN
## 
## sigma^2 estimated as 0.0009886:  log likelihood=12441.15
## AIC=-24864.31   AICc=-24864.28   BIC=-24803.87

Forecast Using ARIMA Model

# forecast using
fcData1 <- forecast(armModel1,h=30)
fcData1
##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 5486401       17.89277 17.85248 17.93307 17.83115 17.95440
## 5487301       17.95961 17.88321 18.03601 17.84276 18.07645
## 5488201       18.02245 17.90350 18.14141 17.84053 18.20438
## 5489101       18.08158 17.91339 18.24977 17.82435 18.33881
## 5490001       18.13355 17.90982 18.35729 17.79138 18.47572
## 5490901       18.18072 17.89786 18.46358 17.74812 18.61331
## 5491801       18.22133 17.87652 18.56615 17.69398 18.74868
## 5492701       18.25775 17.84990 18.66561 17.63399 18.88151
## 5493601       18.28887 17.81725 18.76050 17.56758 19.01016
## 5494501       18.31667 17.78157 18.85178 17.49830 19.13505
## 5495401       18.34034 17.74214 18.93855 17.42547 19.25522
## 5496301       18.36147 17.70115 19.02180 17.35160 19.37135
## 5497201       18.37943 17.65793 19.10093 17.27600 19.48287
## 5498101       18.39547 17.61408 19.17686 17.20044 19.59050
## 5499001       18.40908 17.56900 19.24915 17.12430 19.69386
## 5499901       18.42124 17.52388 19.31860 17.04884 19.79364
## 5500801       18.43155 17.47819 19.38490 16.97351 19.88958
## 5501701       18.44077 17.43281 19.44873 16.89923 19.98231
## 5502601       18.44858 17.38732 19.50984 16.82552 20.07163
## 5503501       18.45557 17.34235 19.56879 16.75305 20.15809
## 5504401       18.46148 17.29755 19.62542 16.68140 20.24157
## 5505301       18.46679 17.25339 19.68018 16.61106 20.32251
## 5506201       18.47126 17.20959 19.73294 16.54170 20.40082
## 5507101       18.47528 17.16650 19.78407 16.47367 20.47690
## 5508001       18.47868 17.12387 19.83348 16.40668 20.55067
## 5508901       18.48172 17.08197 19.88148 16.34098 20.62246
## 5509801       18.48429 17.04059 19.92799 16.27635 20.69224
## 5510701       18.48660 16.99995 19.97326 16.21296 20.76025
## 5511601       18.48855 16.95985 20.01724 16.15061 20.82648
## 5512501       18.49030 16.92047 20.06013 16.08945 20.89115

Plot Forecast Using ARIMA Model

autoplot(fcData1)

Observation
Blue colour data lines are showing the forecasted values.

SUMMARY

  1. Data is loaded from the file
    -As there are multipele records for every day so we are not going Montly or Yearly wise
  2. Data is seperated as per the requirement of the project.
  3. In Time series we have normal time series and extended time series, according to that we use Moving average or ARIMA model for forecasting.
  4. Seperate Data frames were created for two different columns and all the assumption were checked before creating the model.
    • A stationary process has the property that the mean, variance and autocorrelation structure do not change over time.
    • Unless Time Series Is Stationary, An ARIMA Model Can Not Be Built
  5. After checking the ACF Test, results wee showing data in not stationary for both of the columns while other condition met for ARIMA Model.
  6. Still for Academic purpose we built ARIMA model with given data and doing the forecasting from the same.